1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15use ui::SharedString;
16
17pub async fn parse_markdown(
18 markdown_input: &str,
19 file_location_directory: Option<PathBuf>,
20 language_registry: Option<Arc<LanguageRegistry>>,
21) -> ParsedMarkdown {
22 let mut options = Options::all();
23 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
24
25 let parser = Parser::new_ext(markdown_input, options);
26 let parser = MarkdownParser::new(
27 parser.into_offset_iter().collect(),
28 file_location_directory,
29 language_registry,
30 );
31 let renderer = parser.parse_document().await;
32 ParsedMarkdown {
33 children: renderer.parsed,
34 }
35}
36
37fn cleanup_html(source: &str) -> Vec<u8> {
38 let mut writer = std::io::Cursor::new(Vec::new());
39 let mut reader = std::io::Cursor::new(source);
40 let mut minify = Minifier::new(
41 &mut writer,
42 MinifierOptions {
43 omit_doctype: true,
44 collapse_whitespace: true,
45 ..Default::default()
46 },
47 );
48 if let Ok(()) = minify.minify(&mut reader) {
49 writer.into_inner()
50 } else {
51 source.bytes().collect()
52 }
53}
54
55struct MarkdownParser<'a> {
56 tokens: Vec<(Event<'a>, Range<usize>)>,
57 /// The current index in the tokens array
58 cursor: usize,
59 /// The blocks that we have successfully parsed so far
60 parsed: Vec<ParsedMarkdownElement>,
61 file_location_directory: Option<PathBuf>,
62 language_registry: Option<Arc<LanguageRegistry>>,
63}
64
65#[derive(Debug)]
66struct ParseHtmlNodeContext {
67 list_item_depth: u16,
68}
69
70impl Default for ParseHtmlNodeContext {
71 fn default() -> Self {
72 Self { list_item_depth: 1 }
73 }
74}
75
76struct MarkdownListItem {
77 content: Vec<ParsedMarkdownElement>,
78 item_type: ParsedMarkdownListItemType,
79}
80
81impl Default for MarkdownListItem {
82 fn default() -> Self {
83 Self {
84 content: Vec::new(),
85 item_type: ParsedMarkdownListItemType::Unordered,
86 }
87 }
88}
89
90impl<'a> MarkdownParser<'a> {
91 fn new(
92 tokens: Vec<(Event<'a>, Range<usize>)>,
93 file_location_directory: Option<PathBuf>,
94 language_registry: Option<Arc<LanguageRegistry>>,
95 ) -> Self {
96 Self {
97 tokens,
98 file_location_directory,
99 language_registry,
100 cursor: 0,
101 parsed: vec![],
102 }
103 }
104
105 fn eof(&self) -> bool {
106 if self.tokens.is_empty() {
107 return true;
108 }
109 self.cursor >= self.tokens.len() - 1
110 }
111
112 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
113 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
114 return self.tokens.last();
115 }
116 self.tokens.get(self.cursor + steps)
117 }
118
119 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
120 if self.cursor == 0 || self.cursor > self.tokens.len() {
121 return None;
122 }
123 self.tokens.get(self.cursor - 1)
124 }
125
126 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
127 self.peek(0)
128 }
129
130 fn current_event(&self) -> Option<&Event<'_>> {
131 self.current().map(|(event, _)| event)
132 }
133
134 fn is_text_like(event: &Event) -> bool {
135 match event {
136 Event::Text(_)
137 // Represent an inline code block
138 | Event::Code(_)
139 | Event::Html(_)
140 | Event::InlineHtml(_)
141 | Event::FootnoteReference(_)
142 | Event::Start(Tag::Link { .. })
143 | Event::Start(Tag::Emphasis)
144 | Event::Start(Tag::Strong)
145 | Event::Start(Tag::Strikethrough)
146 | Event::Start(Tag::Image { .. }) => {
147 true
148 }
149 _ => false,
150 }
151 }
152
153 async fn parse_document(mut self) -> Self {
154 while !self.eof() {
155 if let Some(block) = self.parse_block().await {
156 self.parsed.extend(block);
157 } else {
158 self.cursor += 1;
159 }
160 }
161 self
162 }
163
164 #[async_recursion]
165 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
166 let (current, source_range) = self.current().unwrap();
167 let source_range = source_range.clone();
168 match current {
169 Event::Start(tag) => match tag {
170 Tag::Paragraph => {
171 self.cursor += 1;
172 let text = self.parse_text(false, Some(source_range));
173 Some(vec![ParsedMarkdownElement::Paragraph(text)])
174 }
175 Tag::Heading { level, .. } => {
176 let level = *level;
177 self.cursor += 1;
178 let heading = self.parse_heading(level);
179 Some(vec![ParsedMarkdownElement::Heading(heading)])
180 }
181 Tag::Table(alignment) => {
182 let alignment = alignment.clone();
183 self.cursor += 1;
184 let table = self.parse_table(alignment);
185 Some(vec![ParsedMarkdownElement::Table(table)])
186 }
187 Tag::List(order) => {
188 let order = *order;
189 self.cursor += 1;
190 let list = self.parse_list(order).await;
191 Some(list)
192 }
193 Tag::BlockQuote(_kind) => {
194 self.cursor += 1;
195 let block_quote = self.parse_block_quote().await;
196 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
197 }
198 Tag::CodeBlock(kind) => {
199 let (language, scale) = match kind {
200 pulldown_cmark::CodeBlockKind::Indented => (None, None),
201 pulldown_cmark::CodeBlockKind::Fenced(language) => {
202 if language.is_empty() {
203 (None, None)
204 } else {
205 let parts: Vec<&str> = language.split_whitespace().collect();
206 let lang = parts.first().map(|s| s.to_string());
207 let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
208 (lang, scale)
209 }
210 }
211 };
212
213 self.cursor += 1;
214
215 if language.as_deref() == Some("mermaid") {
216 let mermaid_diagram = self.parse_mermaid_diagram(scale).await?;
217 Some(vec![ParsedMarkdownElement::MermaidDiagram(mermaid_diagram)])
218 } else {
219 let code_block = self.parse_code_block(language).await?;
220 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
221 }
222 }
223 Tag::HtmlBlock => {
224 self.cursor += 1;
225
226 Some(self.parse_html_block().await)
227 }
228 _ => None,
229 },
230 Event::Rule => {
231 self.cursor += 1;
232 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
233 }
234 _ => None,
235 }
236 }
237
238 fn parse_text(
239 &mut self,
240 should_complete_on_soft_break: bool,
241 source_range: Option<Range<usize>>,
242 ) -> MarkdownParagraph {
243 let source_range = source_range.unwrap_or_else(|| {
244 self.current()
245 .map(|(_, range)| range.clone())
246 .unwrap_or_default()
247 });
248
249 let mut markdown_text_like = Vec::new();
250 let mut text = String::new();
251 let mut bold_depth = 0;
252 let mut italic_depth = 0;
253 let mut strikethrough_depth = 0;
254 let mut link: Option<Link> = None;
255 let mut image: Option<Image> = None;
256 let mut regions: Vec<(Range<usize>, ParsedRegion)> = vec![];
257 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
258 let mut link_urls: Vec<String> = vec![];
259 let mut link_ranges: Vec<Range<usize>> = vec![];
260
261 loop {
262 if self.eof() {
263 break;
264 }
265
266 let (current, _) = self.current().unwrap();
267 let prev_len = text.len();
268 match current {
269 Event::SoftBreak => {
270 if should_complete_on_soft_break {
271 break;
272 }
273 text.push(' ');
274 }
275
276 Event::HardBreak => {
277 text.push('\n');
278 }
279
280 // We want to ignore any inline HTML tags in the text but keep
281 // the text between them
282 Event::InlineHtml(_) => {}
283
284 Event::Text(t) => {
285 text.push_str(t.as_ref());
286 let mut style = MarkdownHighlightStyle::default();
287
288 if bold_depth > 0 {
289 style.weight = FontWeight::BOLD;
290 }
291
292 if italic_depth > 0 {
293 style.italic = true;
294 }
295
296 if strikethrough_depth > 0 {
297 style.strikethrough = true;
298 }
299
300 let last_run_len = if let Some(link) = link.clone() {
301 regions.push((
302 prev_len..text.len(),
303 ParsedRegion {
304 code: false,
305 link: Some(link),
306 },
307 ));
308 style.link = true;
309 prev_len
310 } else {
311 // Manually scan for links
312 let mut finder = linkify::LinkFinder::new();
313 finder.kinds(&[linkify::LinkKind::Url]);
314 let mut last_link_len = prev_len;
315 for link in finder.links(t) {
316 let start = prev_len + link.start();
317 let end = prev_len + link.end();
318 let range = start..end;
319 link_ranges.push(range.clone());
320 link_urls.push(link.as_str().to_string());
321
322 // If there is a style before we match a link, we have to add this to the highlighted ranges
323 if style != MarkdownHighlightStyle::default() && last_link_len < start {
324 highlights.push((
325 last_link_len..start,
326 MarkdownHighlight::Style(style.clone()),
327 ));
328 }
329
330 highlights.push((
331 range.clone(),
332 MarkdownHighlight::Style(MarkdownHighlightStyle {
333 underline: true,
334 ..style
335 }),
336 ));
337
338 regions.push((
339 range.clone(),
340 ParsedRegion {
341 code: false,
342 link: Some(Link::Web {
343 url: link.as_str().to_string(),
344 }),
345 },
346 ));
347 last_link_len = end;
348 }
349 last_link_len
350 };
351
352 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
353 let mut new_highlight = true;
354 if let Some((last_range, last_style)) = highlights.last_mut()
355 && last_range.end == last_run_len
356 && last_style == &MarkdownHighlight::Style(style.clone())
357 {
358 last_range.end = text.len();
359 new_highlight = false;
360 }
361 if new_highlight {
362 highlights.push((
363 last_run_len..text.len(),
364 MarkdownHighlight::Style(style.clone()),
365 ));
366 }
367 }
368 }
369 Event::Code(t) => {
370 text.push_str(t.as_ref());
371 let range = prev_len..text.len();
372
373 if link.is_some() {
374 highlights.push((
375 range.clone(),
376 MarkdownHighlight::Style(MarkdownHighlightStyle {
377 link: true,
378 ..Default::default()
379 }),
380 ));
381 }
382 regions.push((
383 range,
384 ParsedRegion {
385 code: true,
386 link: link.clone(),
387 },
388 ));
389 }
390 Event::Start(tag) => match tag {
391 Tag::Emphasis => italic_depth += 1,
392 Tag::Strong => bold_depth += 1,
393 Tag::Strikethrough => strikethrough_depth += 1,
394 Tag::Link { dest_url, .. } => {
395 link = Link::identify(
396 self.file_location_directory.clone(),
397 dest_url.to_string(),
398 );
399 }
400 Tag::Image { dest_url, .. } => {
401 if !text.is_empty() {
402 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
403 source_range: source_range.clone(),
404 contents: mem::take(&mut text).into(),
405 highlights: mem::take(&mut highlights),
406 regions: mem::take(&mut regions),
407 });
408 markdown_text_like.push(parsed_regions);
409 }
410 image = Image::identify(
411 dest_url.to_string(),
412 source_range.clone(),
413 self.file_location_directory.clone(),
414 );
415 }
416 _ => {
417 break;
418 }
419 },
420
421 Event::End(tag) => match tag {
422 TagEnd::Emphasis => italic_depth -= 1,
423 TagEnd::Strong => bold_depth -= 1,
424 TagEnd::Strikethrough => strikethrough_depth -= 1,
425 TagEnd::Link => {
426 link = None;
427 }
428 TagEnd::Image => {
429 if let Some(mut image) = image.take() {
430 if !text.is_empty() {
431 image.set_alt_text(std::mem::take(&mut text).into());
432 mem::take(&mut highlights);
433 mem::take(&mut regions);
434 }
435 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
436 }
437 }
438 TagEnd::Paragraph => {
439 self.cursor += 1;
440 break;
441 }
442 _ => {
443 break;
444 }
445 },
446 _ => {
447 break;
448 }
449 }
450
451 self.cursor += 1;
452 }
453 if !text.is_empty() {
454 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
455 source_range,
456 contents: text.into(),
457 highlights,
458 regions,
459 }));
460 }
461 markdown_text_like
462 }
463
464 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
465 let (_event, source_range) = self.previous().unwrap();
466 let source_range = source_range.clone();
467 let text = self.parse_text(true, None);
468
469 // Advance past the heading end tag
470 self.cursor += 1;
471
472 ParsedMarkdownHeading {
473 source_range,
474 level: match level {
475 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
476 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
477 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
478 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
479 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
480 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
481 },
482 contents: text,
483 }
484 }
485
486 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
487 let (_event, source_range) = self.previous().unwrap();
488 let source_range = source_range.clone();
489 let mut header = vec![];
490 let mut body = vec![];
491 let mut row_columns = vec![];
492 let mut in_header = true;
493 let column_alignments = alignment
494 .iter()
495 .map(Self::convert_alignment)
496 .collect::<Vec<_>>();
497
498 loop {
499 if self.eof() {
500 break;
501 }
502
503 let (current, source_range) = self.current().unwrap();
504 let source_range = source_range.clone();
505 match current {
506 Event::Start(Tag::TableHead)
507 | Event::Start(Tag::TableRow)
508 | Event::End(TagEnd::TableCell) => {
509 self.cursor += 1;
510 }
511 Event::Start(Tag::TableCell) => {
512 self.cursor += 1;
513 let cell_contents = self.parse_text(false, Some(source_range));
514 row_columns.push(ParsedMarkdownTableColumn {
515 col_span: 1,
516 row_span: 1,
517 is_header: in_header,
518 children: cell_contents,
519 alignment: column_alignments
520 .get(row_columns.len())
521 .copied()
522 .unwrap_or_default(),
523 });
524 }
525 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
526 self.cursor += 1;
527 let columns = std::mem::take(&mut row_columns);
528 if in_header {
529 header.push(ParsedMarkdownTableRow { columns: columns });
530 in_header = false;
531 } else {
532 body.push(ParsedMarkdownTableRow::with_columns(columns));
533 }
534 }
535 Event::End(TagEnd::Table) => {
536 self.cursor += 1;
537 break;
538 }
539 _ => {
540 break;
541 }
542 }
543 }
544
545 ParsedMarkdownTable {
546 source_range,
547 header,
548 body,
549 caption: None,
550 }
551 }
552
553 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
554 match alignment {
555 Alignment::None => ParsedMarkdownTableAlignment::None,
556 Alignment::Left => ParsedMarkdownTableAlignment::Left,
557 Alignment::Center => ParsedMarkdownTableAlignment::Center,
558 Alignment::Right => ParsedMarkdownTableAlignment::Right,
559 }
560 }
561
562 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
563 let (_, list_source_range) = self.previous().unwrap();
564
565 let mut items = Vec::new();
566 let mut items_stack = vec![MarkdownListItem::default()];
567 let mut depth = 1;
568 let mut order = order;
569 let mut order_stack = Vec::new();
570
571 let mut insertion_indices = FxHashMap::default();
572 let mut source_ranges = FxHashMap::default();
573 let mut start_item_range = list_source_range.clone();
574
575 while !self.eof() {
576 let (current, source_range) = self.current().unwrap();
577 match current {
578 Event::Start(Tag::List(new_order)) => {
579 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
580 insertion_indices.insert(depth, items.len());
581 }
582
583 // We will use the start of the nested list as the end for the current item's range,
584 // because we don't care about the hierarchy of list items
585 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
586 e.insert(start_item_range.start..source_range.start);
587 }
588
589 order_stack.push(order);
590 order = *new_order;
591 self.cursor += 1;
592 depth += 1;
593 }
594 Event::End(TagEnd::List(_)) => {
595 order = order_stack.pop().flatten();
596 self.cursor += 1;
597 depth -= 1;
598
599 if depth == 0 {
600 break;
601 }
602 }
603 Event::Start(Tag::Item) => {
604 start_item_range = source_range.clone();
605
606 self.cursor += 1;
607 items_stack.push(MarkdownListItem::default());
608
609 let mut task_list = None;
610 // Check for task list marker (`- [ ]` or `- [x]`)
611 if let Some(event) = self.current_event() {
612 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
613 if event == &Event::Start(Tag::Paragraph) {
614 self.cursor += 1;
615 }
616
617 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
618 task_list = Some((*checked, range.clone()));
619 self.cursor += 1;
620 }
621 }
622
623 if let Some((event, range)) = self.current() {
624 // This is a plain list item.
625 // For example `- some text` or `1. [Docs](./docs.md)`
626 if MarkdownParser::is_text_like(event) {
627 let text = self.parse_text(false, Some(range.clone()));
628 let block = ParsedMarkdownElement::Paragraph(text);
629 if let Some(content) = items_stack.last_mut() {
630 let item_type = if let Some((checked, range)) = task_list {
631 ParsedMarkdownListItemType::Task(checked, range)
632 } else if let Some(order) = order {
633 ParsedMarkdownListItemType::Ordered(order)
634 } else {
635 ParsedMarkdownListItemType::Unordered
636 };
637 content.item_type = item_type;
638 content.content.push(block);
639 }
640 } else {
641 let block = self.parse_block().await;
642 if let Some(block) = block
643 && let Some(list_item) = items_stack.last_mut()
644 {
645 list_item.content.extend(block);
646 }
647 }
648 }
649
650 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
651 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
652 self.cursor += 1;
653 }
654 }
655 Event::End(TagEnd::Item) => {
656 self.cursor += 1;
657
658 if let Some(current) = order {
659 order = Some(current + 1);
660 }
661
662 if let Some(list_item) = items_stack.pop() {
663 let source_range = source_ranges
664 .remove(&depth)
665 .unwrap_or(start_item_range.clone());
666
667 // We need to remove the last character of the source range, because it includes the newline character
668 let source_range = source_range.start..source_range.end - 1;
669 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
670 source_range,
671 content: list_item.content,
672 depth,
673 item_type: list_item.item_type,
674 nested: false,
675 });
676
677 if let Some(index) = insertion_indices.get(&depth) {
678 items.insert(*index, item);
679 insertion_indices.remove(&depth);
680 } else {
681 items.push(item);
682 }
683 }
684 }
685 _ => {
686 if depth == 0 {
687 break;
688 }
689 // This can only happen if a list item starts with more then one paragraph,
690 // or the list item contains blocks that should be rendered after the nested list items
691 let block = self.parse_block().await;
692 if let Some(block) = block {
693 if let Some(list_item) = items_stack.last_mut() {
694 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
695 if !insertion_indices.contains_key(&depth) {
696 list_item.content.extend(block);
697 continue;
698 }
699 }
700
701 // Otherwise we need to insert the block after all the nested items
702 // that have been parsed so far
703 items.extend(block);
704 } else {
705 self.cursor += 1;
706 }
707 }
708 }
709 }
710
711 items
712 }
713
714 #[async_recursion]
715 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
716 let (_event, source_range) = self.previous().unwrap();
717 let source_range = source_range.clone();
718 let mut nested_depth = 1;
719
720 let mut children: Vec<ParsedMarkdownElement> = vec![];
721
722 while !self.eof() {
723 let block = self.parse_block().await;
724
725 if let Some(block) = block {
726 children.extend(block);
727 } else {
728 break;
729 }
730
731 if self.eof() {
732 break;
733 }
734
735 let (current, _source_range) = self.current().unwrap();
736 match current {
737 // This is a nested block quote.
738 // Record that we're in a nested block quote and continue parsing.
739 // We don't need to advance the cursor since the next
740 // call to `parse_block` will handle it.
741 Event::Start(Tag::BlockQuote(_kind)) => {
742 nested_depth += 1;
743 }
744 Event::End(TagEnd::BlockQuote(_kind)) => {
745 nested_depth -= 1;
746 if nested_depth == 0 {
747 self.cursor += 1;
748 break;
749 }
750 }
751 _ => {}
752 };
753 }
754
755 ParsedMarkdownBlockQuote {
756 source_range,
757 children,
758 }
759 }
760
761 async fn parse_code_block(
762 &mut self,
763 language: Option<String>,
764 ) -> Option<ParsedMarkdownCodeBlock> {
765 let Some((_event, source_range)) = self.previous() else {
766 return None;
767 };
768
769 let source_range = source_range.clone();
770 let mut code = String::new();
771
772 while !self.eof() {
773 let Some((current, _source_range)) = self.current() else {
774 break;
775 };
776
777 match current {
778 Event::Text(text) => {
779 code.push_str(text);
780 self.cursor += 1;
781 }
782 Event::End(TagEnd::CodeBlock) => {
783 self.cursor += 1;
784 break;
785 }
786 _ => {
787 break;
788 }
789 }
790 }
791
792 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
793
794 let highlights = if let Some(language) = &language {
795 if let Some(registry) = &self.language_registry {
796 let rope: language::Rope = code.as_str().into();
797 registry
798 .language_for_name_or_extension(language)
799 .await
800 .map(|l| l.highlight_text(&rope, 0..code.len()))
801 .ok()
802 } else {
803 None
804 }
805 } else {
806 None
807 };
808
809 Some(ParsedMarkdownCodeBlock {
810 source_range,
811 contents: code.into(),
812 language,
813 highlights,
814 })
815 }
816
817 async fn parse_mermaid_diagram(
818 &mut self,
819 scale: Option<u32>,
820 ) -> Option<ParsedMarkdownMermaidDiagram> {
821 let Some((_event, source_range)) = self.previous() else {
822 return None;
823 };
824
825 let source_range = source_range.clone();
826 let mut code = String::new();
827
828 while !self.eof() {
829 let Some((current, _source_range)) = self.current() else {
830 break;
831 };
832
833 match current {
834 Event::Text(text) => {
835 code.push_str(text);
836 self.cursor += 1;
837 }
838 Event::End(TagEnd::CodeBlock) => {
839 self.cursor += 1;
840 break;
841 }
842 _ => {
843 break;
844 }
845 }
846 }
847
848 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
849
850 let scale = scale.unwrap_or(100).clamp(10, 500);
851
852 Some(ParsedMarkdownMermaidDiagram {
853 source_range,
854 contents: ParsedMarkdownMermaidDiagramContents {
855 contents: code.into(),
856 scale,
857 },
858 })
859 }
860
861 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
862 let mut elements = Vec::new();
863 let Some((_event, _source_range)) = self.previous() else {
864 return elements;
865 };
866
867 let mut html_source_range_start = None;
868 let mut html_source_range_end = None;
869 let mut html_buffer = String::new();
870
871 while !self.eof() {
872 let Some((current, source_range)) = self.current() else {
873 break;
874 };
875 let source_range = source_range.clone();
876 match current {
877 Event::Html(html) => {
878 html_source_range_start.get_or_insert(source_range.start);
879 html_source_range_end = Some(source_range.end);
880 html_buffer.push_str(html);
881 self.cursor += 1;
882 }
883 Event::End(TagEnd::CodeBlock) => {
884 self.cursor += 1;
885 break;
886 }
887 _ => {
888 break;
889 }
890 }
891 }
892
893 let bytes = cleanup_html(&html_buffer);
894
895 let mut cursor = std::io::Cursor::new(bytes);
896 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
897 .from_utf8()
898 .read_from(&mut cursor)
899 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
900 {
901 self.parse_html_node(
902 start..end,
903 &dom.document,
904 &mut elements,
905 &ParseHtmlNodeContext::default(),
906 );
907 }
908
909 elements
910 }
911
912 fn parse_html_node(
913 &self,
914 source_range: Range<usize>,
915 node: &Rc<markup5ever_rcdom::Node>,
916 elements: &mut Vec<ParsedMarkdownElement>,
917 context: &ParseHtmlNodeContext,
918 ) {
919 match &node.data {
920 markup5ever_rcdom::NodeData::Document => {
921 self.consume_children(source_range, node, elements, context);
922 }
923 markup5ever_rcdom::NodeData::Text { contents } => {
924 elements.push(ParsedMarkdownElement::Paragraph(vec![
925 MarkdownParagraphChunk::Text(ParsedMarkdownText {
926 source_range,
927 regions: Vec::default(),
928 highlights: Vec::default(),
929 contents: contents.borrow().to_string().into(),
930 }),
931 ]));
932 }
933 markup5ever_rcdom::NodeData::Comment { .. } => {}
934 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
935 let mut styles = if let Some(styles) = Self::markdown_style_from_html_styles(
936 Self::extract_styles_from_attributes(attrs),
937 ) {
938 vec![MarkdownHighlight::Style(styles)]
939 } else {
940 Vec::default()
941 };
942
943 if local_name!("img") == name.local {
944 if let Some(image) = self.extract_image(source_range, attrs) {
945 elements.push(ParsedMarkdownElement::Image(image));
946 }
947 } else if local_name!("p") == name.local {
948 let mut paragraph = MarkdownParagraph::new();
949 self.parse_paragraph(
950 source_range,
951 node,
952 &mut paragraph,
953 &mut styles,
954 &mut Vec::new(),
955 );
956
957 if !paragraph.is_empty() {
958 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
959 }
960 } else if matches!(
961 name.local,
962 local_name!("h1")
963 | local_name!("h2")
964 | local_name!("h3")
965 | local_name!("h4")
966 | local_name!("h5")
967 | local_name!("h6")
968 ) {
969 let mut paragraph = MarkdownParagraph::new();
970 self.consume_paragraph(
971 source_range.clone(),
972 node,
973 &mut paragraph,
974 &mut styles,
975 &mut Vec::new(),
976 );
977
978 if !paragraph.is_empty() {
979 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
980 source_range,
981 level: match name.local {
982 local_name!("h1") => HeadingLevel::H1,
983 local_name!("h2") => HeadingLevel::H2,
984 local_name!("h3") => HeadingLevel::H3,
985 local_name!("h4") => HeadingLevel::H4,
986 local_name!("h5") => HeadingLevel::H5,
987 local_name!("h6") => HeadingLevel::H6,
988 _ => unreachable!(),
989 },
990 contents: paragraph,
991 }));
992 }
993 } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
994 if let Some(list_items) = self.extract_html_list(
995 node,
996 local_name!("ol") == name.local,
997 context.list_item_depth,
998 source_range,
999 ) {
1000 elements.extend(list_items);
1001 }
1002 } else if local_name!("blockquote") == name.local {
1003 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
1004 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
1005 }
1006 } else if local_name!("table") == name.local {
1007 if let Some(table) = self.extract_html_table(node, source_range) {
1008 elements.push(ParsedMarkdownElement::Table(table));
1009 }
1010 } else {
1011 self.consume_children(source_range, node, elements, context);
1012 }
1013 }
1014 _ => {}
1015 }
1016 }
1017
1018 fn parse_paragraph(
1019 &self,
1020 source_range: Range<usize>,
1021 node: &Rc<markup5ever_rcdom::Node>,
1022 paragraph: &mut MarkdownParagraph,
1023 highlights: &mut Vec<MarkdownHighlight>,
1024 regions: &mut Vec<(Range<usize>, ParsedRegion)>,
1025 ) {
1026 fn items_with_range<T>(
1027 range: Range<usize>,
1028 items: impl IntoIterator<Item = T>,
1029 ) -> Vec<(Range<usize>, T)> {
1030 items
1031 .into_iter()
1032 .map(|item| (range.clone(), item))
1033 .collect()
1034 }
1035
1036 match &node.data {
1037 markup5ever_rcdom::NodeData::Text { contents } => {
1038 // append the text to the last chunk, so we can have a hacky version
1039 // of inline text with highlighting
1040 if let Some(text) = paragraph.iter_mut().last().and_then(|p| match p {
1041 MarkdownParagraphChunk::Text(text) => Some(text),
1042 _ => None,
1043 }) {
1044 let mut new_text = text.contents.to_string();
1045 new_text.push_str(&contents.borrow());
1046
1047 text.highlights.extend(items_with_range(
1048 text.contents.len()..new_text.len(),
1049 std::mem::take(highlights),
1050 ));
1051 text.regions.extend(items_with_range(
1052 text.contents.len()..new_text.len(),
1053 std::mem::take(regions)
1054 .into_iter()
1055 .map(|(_, region)| region),
1056 ));
1057 text.contents = SharedString::from(new_text);
1058 } else {
1059 let contents = contents.borrow().to_string();
1060 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
1061 source_range,
1062 highlights: items_with_range(0..contents.len(), std::mem::take(highlights)),
1063 regions: items_with_range(
1064 0..contents.len(),
1065 std::mem::take(regions)
1066 .into_iter()
1067 .map(|(_, region)| region),
1068 ),
1069 contents: contents.into(),
1070 }));
1071 }
1072 }
1073 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1074 if local_name!("img") == name.local {
1075 if let Some(image) = self.extract_image(source_range, attrs) {
1076 paragraph.push(MarkdownParagraphChunk::Image(image));
1077 }
1078 } else if local_name!("b") == name.local || local_name!("strong") == name.local {
1079 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1080 weight: FontWeight::BOLD,
1081 ..Default::default()
1082 }));
1083
1084 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1085 } else if local_name!("i") == name.local {
1086 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1087 italic: true,
1088 ..Default::default()
1089 }));
1090
1091 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1092 } else if local_name!("em") == name.local {
1093 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1094 oblique: true,
1095 ..Default::default()
1096 }));
1097
1098 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1099 } else if local_name!("del") == name.local {
1100 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1101 strikethrough: true,
1102 ..Default::default()
1103 }));
1104
1105 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1106 } else if local_name!("ins") == name.local {
1107 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1108 underline: true,
1109 ..Default::default()
1110 }));
1111
1112 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1113 } else if local_name!("a") == name.local {
1114 if let Some(url) = Self::attr_value(attrs, local_name!("href"))
1115 && let Some(link) =
1116 Link::identify(self.file_location_directory.clone(), url)
1117 {
1118 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1119 link: true,
1120 ..Default::default()
1121 }));
1122
1123 regions.push((
1124 source_range.clone(),
1125 ParsedRegion {
1126 code: false,
1127 link: Some(link),
1128 },
1129 ));
1130 }
1131
1132 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1133 } else {
1134 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1135 }
1136 }
1137 _ => {}
1138 }
1139 }
1140
1141 fn consume_paragraph(
1142 &self,
1143 source_range: Range<usize>,
1144 node: &Rc<markup5ever_rcdom::Node>,
1145 paragraph: &mut MarkdownParagraph,
1146 highlights: &mut Vec<MarkdownHighlight>,
1147 regions: &mut Vec<(Range<usize>, ParsedRegion)>,
1148 ) {
1149 for node in node.children.borrow().iter() {
1150 self.parse_paragraph(source_range.clone(), node, paragraph, highlights, regions);
1151 }
1152 }
1153
1154 fn parse_table_row(
1155 &self,
1156 source_range: Range<usize>,
1157 node: &Rc<markup5ever_rcdom::Node>,
1158 ) -> Option<ParsedMarkdownTableRow> {
1159 let mut columns = Vec::new();
1160
1161 match &node.data {
1162 markup5ever_rcdom::NodeData::Element { name, .. } => {
1163 if local_name!("tr") != name.local {
1164 return None;
1165 }
1166
1167 for node in node.children.borrow().iter() {
1168 if let Some(column) = self.parse_table_column(source_range.clone(), node) {
1169 columns.push(column);
1170 }
1171 }
1172 }
1173 _ => {}
1174 }
1175
1176 if columns.is_empty() {
1177 None
1178 } else {
1179 Some(ParsedMarkdownTableRow { columns })
1180 }
1181 }
1182
1183 fn parse_table_column(
1184 &self,
1185 source_range: Range<usize>,
1186 node: &Rc<markup5ever_rcdom::Node>,
1187 ) -> Option<ParsedMarkdownTableColumn> {
1188 match &node.data {
1189 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1190 if !matches!(name.local, local_name!("th") | local_name!("td")) {
1191 return None;
1192 }
1193
1194 let mut children = MarkdownParagraph::new();
1195 self.consume_paragraph(
1196 source_range,
1197 node,
1198 &mut children,
1199 &mut Vec::new(),
1200 &mut Vec::new(),
1201 );
1202
1203 let is_header = matches!(name.local, local_name!("th"));
1204
1205 Some(ParsedMarkdownTableColumn {
1206 col_span: std::cmp::max(
1207 Self::attr_value(attrs, local_name!("colspan"))
1208 .and_then(|span| span.parse().ok())
1209 .unwrap_or(1),
1210 1,
1211 ),
1212 row_span: std::cmp::max(
1213 Self::attr_value(attrs, local_name!("rowspan"))
1214 .and_then(|span| span.parse().ok())
1215 .unwrap_or(1),
1216 1,
1217 ),
1218 is_header,
1219 children,
1220 alignment: Self::attr_value(attrs, local_name!("align"))
1221 .and_then(|align| match align.as_str() {
1222 "left" => Some(ParsedMarkdownTableAlignment::Left),
1223 "center" => Some(ParsedMarkdownTableAlignment::Center),
1224 "right" => Some(ParsedMarkdownTableAlignment::Right),
1225 _ => None,
1226 })
1227 .unwrap_or_else(|| {
1228 if is_header {
1229 ParsedMarkdownTableAlignment::Center
1230 } else {
1231 ParsedMarkdownTableAlignment::default()
1232 }
1233 }),
1234 })
1235 }
1236 _ => None,
1237 }
1238 }
1239
1240 fn consume_children(
1241 &self,
1242 source_range: Range<usize>,
1243 node: &Rc<markup5ever_rcdom::Node>,
1244 elements: &mut Vec<ParsedMarkdownElement>,
1245 context: &ParseHtmlNodeContext,
1246 ) {
1247 for node in node.children.borrow().iter() {
1248 self.parse_html_node(source_range.clone(), node, elements, context);
1249 }
1250 }
1251
1252 fn attr_value(
1253 attrs: &RefCell<Vec<html5ever::Attribute>>,
1254 name: html5ever::LocalName,
1255 ) -> Option<String> {
1256 attrs.borrow().iter().find_map(|attr| {
1257 if attr.name.local == name {
1258 Some(attr.value.to_string())
1259 } else {
1260 None
1261 }
1262 })
1263 }
1264
1265 fn markdown_style_from_html_styles(
1266 styles: HashMap<String, String>,
1267 ) -> Option<MarkdownHighlightStyle> {
1268 let mut markdown_style = MarkdownHighlightStyle::default();
1269
1270 if let Some(text_decoration) = styles.get("text-decoration") {
1271 match text_decoration.to_lowercase().as_str() {
1272 "underline" => {
1273 markdown_style.underline = true;
1274 }
1275 "line-through" => {
1276 markdown_style.strikethrough = true;
1277 }
1278 _ => {}
1279 }
1280 }
1281
1282 if let Some(font_style) = styles.get("font-style") {
1283 match font_style.to_lowercase().as_str() {
1284 "italic" => {
1285 markdown_style.italic = true;
1286 }
1287 "oblique" => {
1288 markdown_style.oblique = true;
1289 }
1290 _ => {}
1291 }
1292 }
1293
1294 if let Some(font_weight) = styles.get("font-weight") {
1295 match font_weight.to_lowercase().as_str() {
1296 "bold" => {
1297 markdown_style.weight = FontWeight::BOLD;
1298 }
1299 "lighter" => {
1300 markdown_style.weight = FontWeight::THIN;
1301 }
1302 _ => {
1303 if let Some(weight) = font_weight.parse::<f32>().ok() {
1304 markdown_style.weight = FontWeight(weight);
1305 }
1306 }
1307 }
1308 }
1309
1310 if markdown_style != MarkdownHighlightStyle::default() {
1311 Some(markdown_style)
1312 } else {
1313 None
1314 }
1315 }
1316
1317 fn extract_styles_from_attributes(
1318 attrs: &RefCell<Vec<html5ever::Attribute>>,
1319 ) -> HashMap<String, String> {
1320 let mut styles = HashMap::new();
1321
1322 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1323 for decl in style.split(';') {
1324 let mut parts = decl.splitn(2, ':');
1325 if let Some((key, value)) = parts.next().zip(parts.next()) {
1326 styles.insert(
1327 key.trim().to_lowercase().to_string(),
1328 value.trim().to_string(),
1329 );
1330 }
1331 }
1332 }
1333
1334 styles
1335 }
1336
1337 fn extract_image(
1338 &self,
1339 source_range: Range<usize>,
1340 attrs: &RefCell<Vec<html5ever::Attribute>>,
1341 ) -> Option<Image> {
1342 let src = Self::attr_value(attrs, local_name!("src"))?;
1343
1344 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1345
1346 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1347 image.set_alt_text(alt.into());
1348 }
1349
1350 let styles = Self::extract_styles_from_attributes(attrs);
1351
1352 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1353 .or_else(|| styles.get("width").cloned())
1354 .and_then(|width| Self::parse_html_element_dimension(&width))
1355 {
1356 image.set_width(width);
1357 }
1358
1359 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1360 .or_else(|| styles.get("height").cloned())
1361 .and_then(|height| Self::parse_html_element_dimension(&height))
1362 {
1363 image.set_height(height);
1364 }
1365
1366 Some(image)
1367 }
1368
1369 fn extract_html_list(
1370 &self,
1371 node: &Rc<markup5ever_rcdom::Node>,
1372 ordered: bool,
1373 depth: u16,
1374 source_range: Range<usize>,
1375 ) -> Option<Vec<ParsedMarkdownElement>> {
1376 let mut list_items = Vec::with_capacity(node.children.borrow().len());
1377
1378 for (index, node) in node.children.borrow().iter().enumerate() {
1379 match &node.data {
1380 markup5ever_rcdom::NodeData::Element { name, .. } => {
1381 if local_name!("li") != name.local {
1382 continue;
1383 }
1384
1385 let mut content = Vec::new();
1386 self.consume_children(
1387 source_range.clone(),
1388 node,
1389 &mut content,
1390 &ParseHtmlNodeContext {
1391 list_item_depth: depth + 1,
1392 },
1393 );
1394
1395 if !content.is_empty() {
1396 list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1397 depth,
1398 source_range: source_range.clone(),
1399 item_type: if ordered {
1400 ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1401 } else {
1402 ParsedMarkdownListItemType::Unordered
1403 },
1404 content,
1405 nested: true,
1406 }));
1407 }
1408 }
1409 _ => {}
1410 }
1411 }
1412
1413 if list_items.is_empty() {
1414 None
1415 } else {
1416 Some(list_items)
1417 }
1418 }
1419
1420 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1421 if value.ends_with("%") {
1422 value
1423 .trim_end_matches("%")
1424 .parse::<f32>()
1425 .ok()
1426 .map(|value| relative(value / 100.))
1427 } else {
1428 value
1429 .trim_end_matches("px")
1430 .parse()
1431 .ok()
1432 .map(|value| px(value).into())
1433 }
1434 }
1435
1436 fn extract_html_blockquote(
1437 &self,
1438 node: &Rc<markup5ever_rcdom::Node>,
1439 source_range: Range<usize>,
1440 ) -> Option<ParsedMarkdownBlockQuote> {
1441 let mut children = Vec::new();
1442 self.consume_children(
1443 source_range.clone(),
1444 node,
1445 &mut children,
1446 &ParseHtmlNodeContext::default(),
1447 );
1448
1449 if children.is_empty() {
1450 None
1451 } else {
1452 Some(ParsedMarkdownBlockQuote {
1453 children,
1454 source_range,
1455 })
1456 }
1457 }
1458
1459 fn extract_html_table(
1460 &self,
1461 node: &Rc<markup5ever_rcdom::Node>,
1462 source_range: Range<usize>,
1463 ) -> Option<ParsedMarkdownTable> {
1464 let mut header_rows = Vec::new();
1465 let mut body_rows = Vec::new();
1466 let mut caption = None;
1467
1468 // node should be a thead, tbody or caption element
1469 for node in node.children.borrow().iter() {
1470 match &node.data {
1471 markup5ever_rcdom::NodeData::Element { name, .. } => {
1472 if local_name!("caption") == name.local {
1473 let mut paragraph = MarkdownParagraph::new();
1474 self.parse_paragraph(
1475 source_range.clone(),
1476 node,
1477 &mut paragraph,
1478 &mut Vec::new(),
1479 &mut Vec::new(),
1480 );
1481 caption = Some(paragraph);
1482 }
1483 if local_name!("thead") == name.local {
1484 // node should be a tr element
1485 for node in node.children.borrow().iter() {
1486 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1487 header_rows.push(row);
1488 }
1489 }
1490 } else if local_name!("tbody") == name.local {
1491 // node should be a tr element
1492 for node in node.children.borrow().iter() {
1493 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1494 body_rows.push(row);
1495 }
1496 }
1497 }
1498 }
1499 _ => {}
1500 }
1501 }
1502
1503 if !header_rows.is_empty() || !body_rows.is_empty() {
1504 Some(ParsedMarkdownTable {
1505 source_range,
1506 body: body_rows,
1507 header: header_rows,
1508 caption,
1509 })
1510 } else {
1511 None
1512 }
1513 }
1514}
1515
1516#[cfg(test)]
1517mod tests {
1518 use super::*;
1519 use ParsedMarkdownListItemType::*;
1520 use core::panic;
1521 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1522 use language::{HighlightId, LanguageRegistry};
1523 use pretty_assertions::assert_eq;
1524
1525 async fn parse(input: &str) -> ParsedMarkdown {
1526 parse_markdown(input, None, None).await
1527 }
1528
1529 #[gpui::test]
1530 async fn test_headings() {
1531 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1532
1533 assert_eq!(
1534 parsed.children,
1535 vec![
1536 h1(text("Heading one", 2..13), 0..14),
1537 h2(text("Heading two", 17..28), 14..29),
1538 h3(text("Heading three", 33..46), 29..46),
1539 ]
1540 );
1541 }
1542
1543 #[gpui::test]
1544 async fn test_newlines_dont_new_paragraphs() {
1545 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1546
1547 assert_eq!(
1548 parsed.children,
1549 vec![p("Some text that is bolded and italicized", 0..46)]
1550 );
1551 }
1552
1553 #[gpui::test]
1554 async fn test_heading_with_paragraph() {
1555 let parsed = parse("# Zed\nThe editor").await;
1556
1557 assert_eq!(
1558 parsed.children,
1559 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1560 );
1561 }
1562
1563 #[gpui::test]
1564 async fn test_double_newlines_do_new_paragraphs() {
1565 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1566
1567 assert_eq!(
1568 parsed.children,
1569 vec![
1570 p("Some text that is bolded", 0..29),
1571 p("and italicized", 31..47),
1572 ]
1573 );
1574 }
1575
1576 #[gpui::test]
1577 async fn test_bold_italic_text() {
1578 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1579
1580 assert_eq!(
1581 parsed.children,
1582 vec![p("Some text that is bolded and italicized", 0..45)]
1583 );
1584 }
1585
1586 #[gpui::test]
1587 async fn test_nested_bold_strikethrough_text() {
1588 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1589
1590 assert_eq!(parsed.children.len(), 1);
1591 assert_eq!(
1592 parsed.children[0],
1593 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1594 ParsedMarkdownText {
1595 source_range: 0..35,
1596 contents: "Some bostrikethroughld text".into(),
1597 highlights: Vec::new(),
1598 regions: Vec::new(),
1599 }
1600 )])
1601 );
1602
1603 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1604 text
1605 } else {
1606 panic!("Expected a paragraph");
1607 };
1608
1609 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1610 text
1611 } else {
1612 panic!("Expected a text");
1613 };
1614
1615 assert_eq!(
1616 paragraph.highlights,
1617 vec![
1618 (
1619 5..7,
1620 MarkdownHighlight::Style(MarkdownHighlightStyle {
1621 weight: FontWeight::BOLD,
1622 ..Default::default()
1623 }),
1624 ),
1625 (
1626 7..20,
1627 MarkdownHighlight::Style(MarkdownHighlightStyle {
1628 weight: FontWeight::BOLD,
1629 strikethrough: true,
1630 ..Default::default()
1631 }),
1632 ),
1633 (
1634 20..22,
1635 MarkdownHighlight::Style(MarkdownHighlightStyle {
1636 weight: FontWeight::BOLD,
1637 ..Default::default()
1638 }),
1639 ),
1640 ]
1641 );
1642 }
1643
1644 #[gpui::test]
1645 async fn test_html_inline_style_elements() {
1646 let parsed =
1647 parse("<p>Some text <strong>strong text</strong> more text <b>bold text</b> more text <i>italic text</i> more text <em>emphasized text</em> more text <del>deleted text</del> more text <ins>inserted text</ins></p>").await;
1648
1649 assert_eq!(1, parsed.children.len());
1650 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1651 chunks
1652 } else {
1653 panic!("Expected a paragraph");
1654 };
1655
1656 assert_eq!(1, chunks.len());
1657 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1658 text
1659 } else {
1660 panic!("Expected a paragraph");
1661 };
1662
1663 assert_eq!(0..205, text.source_range);
1664 assert_eq!(
1665 "Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text",
1666 text.contents.as_str(),
1667 );
1668 assert_eq!(
1669 vec![
1670 (
1671 10..21,
1672 MarkdownHighlight::Style(MarkdownHighlightStyle {
1673 weight: FontWeight(700.0),
1674 ..Default::default()
1675 },),
1676 ),
1677 (
1678 32..41,
1679 MarkdownHighlight::Style(MarkdownHighlightStyle {
1680 weight: FontWeight(700.0),
1681 ..Default::default()
1682 },),
1683 ),
1684 (
1685 52..63,
1686 MarkdownHighlight::Style(MarkdownHighlightStyle {
1687 italic: true,
1688 weight: FontWeight(400.0),
1689 ..Default::default()
1690 },),
1691 ),
1692 (
1693 74..89,
1694 MarkdownHighlight::Style(MarkdownHighlightStyle {
1695 weight: FontWeight(400.0),
1696 oblique: true,
1697 ..Default::default()
1698 },),
1699 ),
1700 (
1701 100..112,
1702 MarkdownHighlight::Style(MarkdownHighlightStyle {
1703 strikethrough: true,
1704 weight: FontWeight(400.0),
1705 ..Default::default()
1706 },),
1707 ),
1708 (
1709 123..136,
1710 MarkdownHighlight::Style(MarkdownHighlightStyle {
1711 underline: true,
1712 weight: FontWeight(400.0,),
1713 ..Default::default()
1714 },),
1715 ),
1716 ],
1717 text.highlights
1718 );
1719 }
1720
1721 #[gpui::test]
1722 async fn test_html_href_element() {
1723 let parsed =
1724 parse("<p>Some text <a href=\"https://example.com\">link</a> more text</p>").await;
1725
1726 assert_eq!(1, parsed.children.len());
1727 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1728 chunks
1729 } else {
1730 panic!("Expected a paragraph");
1731 };
1732
1733 assert_eq!(1, chunks.len());
1734 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1735 text
1736 } else {
1737 panic!("Expected a paragraph");
1738 };
1739
1740 assert_eq!(0..65, text.source_range);
1741 assert_eq!("Some text link more text", text.contents.as_str(),);
1742 assert_eq!(
1743 vec![(
1744 10..14,
1745 MarkdownHighlight::Style(MarkdownHighlightStyle {
1746 link: true,
1747 ..Default::default()
1748 },),
1749 )],
1750 text.highlights
1751 );
1752 assert_eq!(
1753 vec![(
1754 10..14,
1755 ParsedRegion {
1756 code: false,
1757 link: Some(Link::Web {
1758 url: "https://example.com".into()
1759 })
1760 }
1761 )],
1762 text.regions
1763 )
1764 }
1765
1766 #[gpui::test]
1767 async fn test_text_with_inline_html() {
1768 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1769
1770 assert_eq!(
1771 parsed.children,
1772 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1773 );
1774 }
1775
1776 #[gpui::test]
1777 async fn test_raw_links_detection() {
1778 let parsed = parse("Checkout this https://zed.dev link").await;
1779
1780 assert_eq!(
1781 parsed.children,
1782 vec![p("Checkout this https://zed.dev link", 0..34)]
1783 );
1784 }
1785
1786 #[gpui::test]
1787 async fn test_empty_image() {
1788 let parsed = parse("![]()").await;
1789
1790 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1791 text
1792 } else {
1793 panic!("Expected a paragraph");
1794 };
1795 assert_eq!(paragraph.len(), 0);
1796 }
1797
1798 #[gpui::test]
1799 async fn test_image_links_detection() {
1800 let parsed = parse("").await;
1801
1802 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1803 text
1804 } else {
1805 panic!("Expected a paragraph");
1806 };
1807 assert_eq!(
1808 paragraph[0],
1809 MarkdownParagraphChunk::Image(Image {
1810 source_range: 0..111,
1811 link: Link::Web {
1812 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1813 },
1814 alt_text: Some("test".into()),
1815 height: None,
1816 width: None,
1817 },)
1818 );
1819 }
1820
1821 #[gpui::test]
1822 async fn test_image_alt_text() {
1823 let parsed = parse("[](https://zed.dev)\n ").await;
1824
1825 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1826 text
1827 } else {
1828 panic!("Expected a paragraph");
1829 };
1830 assert_eq!(
1831 paragraph[0],
1832 MarkdownParagraphChunk::Image(Image {
1833 source_range: 0..142,
1834 link: Link::Web {
1835 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1836 },
1837 alt_text: Some("Zed".into()),
1838 height: None,
1839 width: None,
1840 },)
1841 );
1842 }
1843
1844 #[gpui::test]
1845 async fn test_image_without_alt_text() {
1846 let parsed = parse("").await;
1847
1848 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1849 text
1850 } else {
1851 panic!("Expected a paragraph");
1852 };
1853 assert_eq!(
1854 paragraph[0],
1855 MarkdownParagraphChunk::Image(Image {
1856 source_range: 0..31,
1857 link: Link::Web {
1858 url: "http://example.com/foo.png".to_string(),
1859 },
1860 alt_text: None,
1861 height: None,
1862 width: None,
1863 },)
1864 );
1865 }
1866
1867 #[gpui::test]
1868 async fn test_image_with_alt_text_containing_formatting() {
1869 let parsed = parse("").await;
1870
1871 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1872 panic!("Expected a paragraph");
1873 };
1874 assert_eq!(
1875 chunks,
1876 &[MarkdownParagraphChunk::Image(Image {
1877 source_range: 0..44,
1878 link: Link::Web {
1879 url: "http://example.com/foo.png".to_string(),
1880 },
1881 alt_text: Some("foo bar baz".into()),
1882 height: None,
1883 width: None,
1884 }),],
1885 );
1886 }
1887
1888 #[gpui::test]
1889 async fn test_images_with_text_in_between() {
1890 let parsed = parse(
1891 "\nLorem Ipsum\n",
1892 )
1893 .await;
1894
1895 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1896 text
1897 } else {
1898 panic!("Expected a paragraph");
1899 };
1900 assert_eq!(
1901 chunks,
1902 &vec![
1903 MarkdownParagraphChunk::Image(Image {
1904 source_range: 0..81,
1905 link: Link::Web {
1906 url: "http://example.com/foo.png".to_string(),
1907 },
1908 alt_text: Some("foo".into()),
1909 height: None,
1910 width: None,
1911 }),
1912 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1913 source_range: 0..81,
1914 contents: " Lorem Ipsum ".into(),
1915 highlights: Vec::new(),
1916 regions: Vec::new(),
1917 }),
1918 MarkdownParagraphChunk::Image(Image {
1919 source_range: 0..81,
1920 link: Link::Web {
1921 url: "http://example.com/bar.png".to_string(),
1922 },
1923 alt_text: Some("bar".into()),
1924 height: None,
1925 width: None,
1926 })
1927 ]
1928 );
1929 }
1930
1931 #[test]
1932 fn test_parse_html_element_dimension() {
1933 // Test percentage values
1934 assert_eq!(
1935 MarkdownParser::parse_html_element_dimension("50%"),
1936 Some(DefiniteLength::Fraction(0.5))
1937 );
1938 assert_eq!(
1939 MarkdownParser::parse_html_element_dimension("100%"),
1940 Some(DefiniteLength::Fraction(1.0))
1941 );
1942 assert_eq!(
1943 MarkdownParser::parse_html_element_dimension("25%"),
1944 Some(DefiniteLength::Fraction(0.25))
1945 );
1946 assert_eq!(
1947 MarkdownParser::parse_html_element_dimension("0%"),
1948 Some(DefiniteLength::Fraction(0.0))
1949 );
1950
1951 // Test pixel values
1952 assert_eq!(
1953 MarkdownParser::parse_html_element_dimension("100px"),
1954 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1955 );
1956 assert_eq!(
1957 MarkdownParser::parse_html_element_dimension("50px"),
1958 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1959 );
1960 assert_eq!(
1961 MarkdownParser::parse_html_element_dimension("0px"),
1962 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1963 );
1964
1965 // Test values without units (should be treated as pixels)
1966 assert_eq!(
1967 MarkdownParser::parse_html_element_dimension("100"),
1968 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1969 );
1970 assert_eq!(
1971 MarkdownParser::parse_html_element_dimension("42"),
1972 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1973 );
1974
1975 // Test invalid values
1976 assert_eq!(
1977 MarkdownParser::parse_html_element_dimension("invalid"),
1978 None
1979 );
1980 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1981 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1982 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1983 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1984 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1985
1986 // Test decimal values
1987 assert_eq!(
1988 MarkdownParser::parse_html_element_dimension("50.5%"),
1989 Some(DefiniteLength::Fraction(0.505))
1990 );
1991 assert_eq!(
1992 MarkdownParser::parse_html_element_dimension("100.25px"),
1993 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1994 );
1995 assert_eq!(
1996 MarkdownParser::parse_html_element_dimension("42.0"),
1997 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1998 );
1999 }
2000
2001 #[gpui::test]
2002 async fn test_html_unordered_list() {
2003 let parsed = parse(
2004 "<ul>
2005 <li>Item 1</li>
2006 <li>Item 2</li>
2007 </ul>",
2008 )
2009 .await;
2010
2011 assert_eq!(
2012 ParsedMarkdown {
2013 children: vec![
2014 nested_list_item(
2015 0..82,
2016 1,
2017 ParsedMarkdownListItemType::Unordered,
2018 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
2019 ),
2020 nested_list_item(
2021 0..82,
2022 1,
2023 ParsedMarkdownListItemType::Unordered,
2024 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
2025 ),
2026 ]
2027 },
2028 parsed
2029 );
2030 }
2031
2032 #[gpui::test]
2033 async fn test_html_ordered_list() {
2034 let parsed = parse(
2035 "<ol>
2036 <li>Item 1</li>
2037 <li>Item 2</li>
2038 </ol>",
2039 )
2040 .await;
2041
2042 assert_eq!(
2043 ParsedMarkdown {
2044 children: vec![
2045 nested_list_item(
2046 0..82,
2047 1,
2048 ParsedMarkdownListItemType::Ordered(1),
2049 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
2050 ),
2051 nested_list_item(
2052 0..82,
2053 1,
2054 ParsedMarkdownListItemType::Ordered(2),
2055 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
2056 ),
2057 ]
2058 },
2059 parsed
2060 );
2061 }
2062
2063 #[gpui::test]
2064 async fn test_html_nested_ordered_list() {
2065 let parsed = parse(
2066 "<ol>
2067 <li>Item 1</li>
2068 <li>Item 2
2069 <ol>
2070 <li>Sub-Item 1</li>
2071 <li>Sub-Item 2</li>
2072 </ol>
2073 </li>
2074 </ol>",
2075 )
2076 .await;
2077
2078 assert_eq!(
2079 ParsedMarkdown {
2080 children: vec![
2081 nested_list_item(
2082 0..216,
2083 1,
2084 ParsedMarkdownListItemType::Ordered(1),
2085 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
2086 ),
2087 nested_list_item(
2088 0..216,
2089 1,
2090 ParsedMarkdownListItemType::Ordered(2),
2091 vec![
2092 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
2093 nested_list_item(
2094 0..216,
2095 2,
2096 ParsedMarkdownListItemType::Ordered(1),
2097 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2098 ),
2099 nested_list_item(
2100 0..216,
2101 2,
2102 ParsedMarkdownListItemType::Ordered(2),
2103 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2104 ),
2105 ]
2106 ),
2107 ]
2108 },
2109 parsed
2110 );
2111 }
2112
2113 #[gpui::test]
2114 async fn test_html_nested_unordered_list() {
2115 let parsed = parse(
2116 "<ul>
2117 <li>Item 1</li>
2118 <li>Item 2
2119 <ul>
2120 <li>Sub-Item 1</li>
2121 <li>Sub-Item 2</li>
2122 </ul>
2123 </li>
2124 </ul>",
2125 )
2126 .await;
2127
2128 assert_eq!(
2129 ParsedMarkdown {
2130 children: vec![
2131 nested_list_item(
2132 0..216,
2133 1,
2134 ParsedMarkdownListItemType::Unordered,
2135 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
2136 ),
2137 nested_list_item(
2138 0..216,
2139 1,
2140 ParsedMarkdownListItemType::Unordered,
2141 vec![
2142 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
2143 nested_list_item(
2144 0..216,
2145 2,
2146 ParsedMarkdownListItemType::Unordered,
2147 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2148 ),
2149 nested_list_item(
2150 0..216,
2151 2,
2152 ParsedMarkdownListItemType::Unordered,
2153 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2154 ),
2155 ]
2156 ),
2157 ]
2158 },
2159 parsed
2160 );
2161 }
2162
2163 #[gpui::test]
2164 async fn test_inline_html_image_tag() {
2165 let parsed =
2166 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
2167 .await;
2168
2169 assert_eq!(
2170 ParsedMarkdown {
2171 children: vec![ParsedMarkdownElement::Paragraph(vec![
2172 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2173 source_range: 0..71,
2174 contents: "Some text".into(),
2175 highlights: Default::default(),
2176 regions: Default::default()
2177 }),
2178 MarkdownParagraphChunk::Image(Image {
2179 source_range: 0..71,
2180 link: Link::Web {
2181 url: "http://example.com/foo.png".to_string(),
2182 },
2183 alt_text: None,
2184 height: None,
2185 width: None,
2186 }),
2187 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2188 source_range: 0..71,
2189 contents: " some more text".into(),
2190 highlights: Default::default(),
2191 regions: Default::default()
2192 }),
2193 ])]
2194 },
2195 parsed
2196 );
2197 }
2198
2199 #[gpui::test]
2200 async fn test_html_block_quote() {
2201 let parsed = parse(
2202 "<blockquote>
2203 <p>some description</p>
2204 </blockquote>",
2205 )
2206 .await;
2207
2208 assert_eq!(
2209 ParsedMarkdown {
2210 children: vec![block_quote(
2211 vec![ParsedMarkdownElement::Paragraph(text(
2212 "some description",
2213 0..78
2214 ))],
2215 0..78,
2216 )]
2217 },
2218 parsed
2219 );
2220 }
2221
2222 #[gpui::test]
2223 async fn test_html_nested_block_quote() {
2224 let parsed = parse(
2225 "<blockquote>
2226 <p>some description</p>
2227 <blockquote>
2228 <p>second description</p>
2229 </blockquote>
2230 </blockquote>",
2231 )
2232 .await;
2233
2234 assert_eq!(
2235 ParsedMarkdown {
2236 children: vec![block_quote(
2237 vec![
2238 ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
2239 block_quote(
2240 vec![ParsedMarkdownElement::Paragraph(text(
2241 "second description",
2242 0..179
2243 ))],
2244 0..179,
2245 )
2246 ],
2247 0..179,
2248 )]
2249 },
2250 parsed
2251 );
2252 }
2253
2254 #[gpui::test]
2255 async fn test_html_table() {
2256 let parsed = parse(
2257 "<table>
2258 <thead>
2259 <tr>
2260 <th>Id</th>
2261 <th>Name</th>
2262 </tr>
2263 </thead>
2264 <tbody>
2265 <tr>
2266 <td>1</td>
2267 <td>Chris</td>
2268 </tr>
2269 <tr>
2270 <td>2</td>
2271 <td>Dennis</td>
2272 </tr>
2273 </tbody>
2274 </table>",
2275 )
2276 .await;
2277
2278 assert_eq!(
2279 ParsedMarkdown {
2280 children: vec![ParsedMarkdownElement::Table(table(
2281 0..366,
2282 None,
2283 vec![row(vec![
2284 column(
2285 1,
2286 1,
2287 true,
2288 text("Id", 0..366),
2289 ParsedMarkdownTableAlignment::Center
2290 ),
2291 column(
2292 1,
2293 1,
2294 true,
2295 text("Name ", 0..366),
2296 ParsedMarkdownTableAlignment::Center
2297 )
2298 ])],
2299 vec![
2300 row(vec![
2301 column(
2302 1,
2303 1,
2304 false,
2305 text("1", 0..366),
2306 ParsedMarkdownTableAlignment::None
2307 ),
2308 column(
2309 1,
2310 1,
2311 false,
2312 text("Chris", 0..366),
2313 ParsedMarkdownTableAlignment::None
2314 )
2315 ]),
2316 row(vec![
2317 column(
2318 1,
2319 1,
2320 false,
2321 text("2", 0..366),
2322 ParsedMarkdownTableAlignment::None
2323 ),
2324 column(
2325 1,
2326 1,
2327 false,
2328 text("Dennis", 0..366),
2329 ParsedMarkdownTableAlignment::None
2330 )
2331 ]),
2332 ],
2333 ))],
2334 },
2335 parsed
2336 );
2337 }
2338
2339 #[gpui::test]
2340 async fn test_html_table_with_caption() {
2341 let parsed = parse(
2342 "<table>
2343 <caption>My Table</caption>
2344 <tbody>
2345 <tr>
2346 <td>1</td>
2347 <td>Chris</td>
2348 </tr>
2349 <tr>
2350 <td>2</td>
2351 <td>Dennis</td>
2352 </tr>
2353 </tbody>
2354 </table>",
2355 )
2356 .await;
2357
2358 assert_eq!(
2359 ParsedMarkdown {
2360 children: vec![ParsedMarkdownElement::Table(table(
2361 0..280,
2362 Some(vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2363 source_range: 0..280,
2364 contents: "My Table".into(),
2365 highlights: Default::default(),
2366 regions: Default::default()
2367 })]),
2368 vec![],
2369 vec![
2370 row(vec![
2371 column(
2372 1,
2373 1,
2374 false,
2375 text("1", 0..280),
2376 ParsedMarkdownTableAlignment::None
2377 ),
2378 column(
2379 1,
2380 1,
2381 false,
2382 text("Chris", 0..280),
2383 ParsedMarkdownTableAlignment::None
2384 )
2385 ]),
2386 row(vec![
2387 column(
2388 1,
2389 1,
2390 false,
2391 text("2", 0..280),
2392 ParsedMarkdownTableAlignment::None
2393 ),
2394 column(
2395 1,
2396 1,
2397 false,
2398 text("Dennis", 0..280),
2399 ParsedMarkdownTableAlignment::None
2400 )
2401 ]),
2402 ],
2403 ))],
2404 },
2405 parsed
2406 );
2407 }
2408
2409 #[gpui::test]
2410 async fn test_html_table_without_headings() {
2411 let parsed = parse(
2412 "<table>
2413 <tbody>
2414 <tr>
2415 <td>1</td>
2416 <td>Chris</td>
2417 </tr>
2418 <tr>
2419 <td>2</td>
2420 <td>Dennis</td>
2421 </tr>
2422 </tbody>
2423 </table>",
2424 )
2425 .await;
2426
2427 assert_eq!(
2428 ParsedMarkdown {
2429 children: vec![ParsedMarkdownElement::Table(table(
2430 0..240,
2431 None,
2432 vec![],
2433 vec![
2434 row(vec![
2435 column(
2436 1,
2437 1,
2438 false,
2439 text("1", 0..240),
2440 ParsedMarkdownTableAlignment::None
2441 ),
2442 column(
2443 1,
2444 1,
2445 false,
2446 text("Chris", 0..240),
2447 ParsedMarkdownTableAlignment::None
2448 )
2449 ]),
2450 row(vec![
2451 column(
2452 1,
2453 1,
2454 false,
2455 text("2", 0..240),
2456 ParsedMarkdownTableAlignment::None
2457 ),
2458 column(
2459 1,
2460 1,
2461 false,
2462 text("Dennis", 0..240),
2463 ParsedMarkdownTableAlignment::None
2464 )
2465 ]),
2466 ],
2467 ))],
2468 },
2469 parsed
2470 );
2471 }
2472
2473 #[gpui::test]
2474 async fn test_html_table_without_body() {
2475 let parsed = parse(
2476 "<table>
2477 <thead>
2478 <tr>
2479 <th>Id</th>
2480 <th>Name</th>
2481 </tr>
2482 </thead>
2483 </table>",
2484 )
2485 .await;
2486
2487 assert_eq!(
2488 ParsedMarkdown {
2489 children: vec![ParsedMarkdownElement::Table(table(
2490 0..150,
2491 None,
2492 vec![row(vec![
2493 column(
2494 1,
2495 1,
2496 true,
2497 text("Id", 0..150),
2498 ParsedMarkdownTableAlignment::Center
2499 ),
2500 column(
2501 1,
2502 1,
2503 true,
2504 text("Name", 0..150),
2505 ParsedMarkdownTableAlignment::Center
2506 )
2507 ])],
2508 vec![],
2509 ))],
2510 },
2511 parsed
2512 );
2513 }
2514
2515 #[gpui::test]
2516 async fn test_html_heading_tags() {
2517 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2518
2519 assert_eq!(
2520 ParsedMarkdown {
2521 children: vec![
2522 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2523 level: HeadingLevel::H1,
2524 source_range: 0..96,
2525 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2526 source_range: 0..96,
2527 contents: "Heading".into(),
2528 highlights: Vec::default(),
2529 regions: Vec::default()
2530 })],
2531 }),
2532 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2533 level: HeadingLevel::H2,
2534 source_range: 0..96,
2535 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2536 source_range: 0..96,
2537 contents: "Heading".into(),
2538 highlights: Vec::default(),
2539 regions: Vec::default()
2540 })],
2541 }),
2542 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2543 level: HeadingLevel::H3,
2544 source_range: 0..96,
2545 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2546 source_range: 0..96,
2547 contents: "Heading".into(),
2548 highlights: Vec::default(),
2549 regions: Vec::default()
2550 })],
2551 }),
2552 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2553 level: HeadingLevel::H4,
2554 source_range: 0..96,
2555 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2556 source_range: 0..96,
2557 contents: "Heading".into(),
2558 highlights: Vec::default(),
2559 regions: Vec::default()
2560 })],
2561 }),
2562 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2563 level: HeadingLevel::H5,
2564 source_range: 0..96,
2565 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2566 source_range: 0..96,
2567 contents: "Heading".into(),
2568 highlights: Vec::default(),
2569 regions: Vec::default()
2570 })],
2571 }),
2572 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2573 level: HeadingLevel::H6,
2574 source_range: 0..96,
2575 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2576 source_range: 0..96,
2577 contents: "Heading".into(),
2578 highlights: Vec::default(),
2579 regions: Vec::default()
2580 })],
2581 }),
2582 ],
2583 },
2584 parsed
2585 );
2586 }
2587
2588 #[gpui::test]
2589 async fn test_html_image_tag() {
2590 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2591
2592 assert_eq!(
2593 ParsedMarkdown {
2594 children: vec![ParsedMarkdownElement::Image(Image {
2595 source_range: 0..40,
2596 link: Link::Web {
2597 url: "http://example.com/foo.png".to_string(),
2598 },
2599 alt_text: None,
2600 height: None,
2601 width: None,
2602 })]
2603 },
2604 parsed
2605 );
2606 }
2607
2608 #[gpui::test]
2609 async fn test_html_image_tag_with_alt_text() {
2610 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2611
2612 assert_eq!(
2613 ParsedMarkdown {
2614 children: vec![ParsedMarkdownElement::Image(Image {
2615 source_range: 0..50,
2616 link: Link::Web {
2617 url: "http://example.com/foo.png".to_string(),
2618 },
2619 alt_text: Some("Foo".into()),
2620 height: None,
2621 width: None,
2622 })]
2623 },
2624 parsed
2625 );
2626 }
2627
2628 #[gpui::test]
2629 async fn test_html_image_tag_with_height_and_width() {
2630 let parsed =
2631 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2632
2633 assert_eq!(
2634 ParsedMarkdown {
2635 children: vec![ParsedMarkdownElement::Image(Image {
2636 source_range: 0..65,
2637 link: Link::Web {
2638 url: "http://example.com/foo.png".to_string(),
2639 },
2640 alt_text: None,
2641 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2642 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2643 })]
2644 },
2645 parsed
2646 );
2647 }
2648
2649 #[gpui::test]
2650 async fn test_html_image_style_tag_with_height_and_width() {
2651 let parsed = parse(
2652 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2653 )
2654 .await;
2655
2656 assert_eq!(
2657 ParsedMarkdown {
2658 children: vec![ParsedMarkdownElement::Image(Image {
2659 source_range: 0..75,
2660 link: Link::Web {
2661 url: "http://example.com/foo.png".to_string(),
2662 },
2663 alt_text: None,
2664 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2665 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2666 })]
2667 },
2668 parsed
2669 );
2670 }
2671
2672 #[gpui::test]
2673 async fn test_header_only_table() {
2674 let markdown = "\
2675| Header 1 | Header 2 |
2676|----------|----------|
2677
2678Some other content
2679";
2680
2681 let expected_table = table(
2682 0..48,
2683 None,
2684 vec![row(vec![
2685 column(
2686 1,
2687 1,
2688 true,
2689 text("Header 1", 1..11),
2690 ParsedMarkdownTableAlignment::None,
2691 ),
2692 column(
2693 1,
2694 1,
2695 true,
2696 text("Header 2", 12..22),
2697 ParsedMarkdownTableAlignment::None,
2698 ),
2699 ])],
2700 vec![],
2701 );
2702
2703 assert_eq!(
2704 parse(markdown).await.children[0],
2705 ParsedMarkdownElement::Table(expected_table)
2706 );
2707 }
2708
2709 #[gpui::test]
2710 async fn test_basic_table() {
2711 let markdown = "\
2712| Header 1 | Header 2 |
2713|----------|----------|
2714| Cell 1 | Cell 2 |
2715| Cell 3 | Cell 4 |";
2716
2717 let expected_table = table(
2718 0..95,
2719 None,
2720 vec![row(vec![
2721 column(
2722 1,
2723 1,
2724 true,
2725 text("Header 1", 1..11),
2726 ParsedMarkdownTableAlignment::None,
2727 ),
2728 column(
2729 1,
2730 1,
2731 true,
2732 text("Header 2", 12..22),
2733 ParsedMarkdownTableAlignment::None,
2734 ),
2735 ])],
2736 vec![
2737 row(vec![
2738 column(
2739 1,
2740 1,
2741 false,
2742 text("Cell 1", 49..59),
2743 ParsedMarkdownTableAlignment::None,
2744 ),
2745 column(
2746 1,
2747 1,
2748 false,
2749 text("Cell 2", 60..70),
2750 ParsedMarkdownTableAlignment::None,
2751 ),
2752 ]),
2753 row(vec![
2754 column(
2755 1,
2756 1,
2757 false,
2758 text("Cell 3", 73..83),
2759 ParsedMarkdownTableAlignment::None,
2760 ),
2761 column(
2762 1,
2763 1,
2764 false,
2765 text("Cell 4", 84..94),
2766 ParsedMarkdownTableAlignment::None,
2767 ),
2768 ]),
2769 ],
2770 );
2771
2772 assert_eq!(
2773 parse(markdown).await.children[0],
2774 ParsedMarkdownElement::Table(expected_table)
2775 );
2776 }
2777
2778 #[gpui::test]
2779 async fn test_list_basic() {
2780 let parsed = parse(
2781 "\
2782* Item 1
2783* Item 2
2784* Item 3
2785",
2786 )
2787 .await;
2788
2789 assert_eq!(
2790 parsed.children,
2791 vec![
2792 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2793 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2794 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2795 ],
2796 );
2797 }
2798
2799 #[gpui::test]
2800 async fn test_list_with_tasks() {
2801 let parsed = parse(
2802 "\
2803- [ ] TODO
2804- [x] Checked
2805",
2806 )
2807 .await;
2808
2809 assert_eq!(
2810 parsed.children,
2811 vec![
2812 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2813 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2814 ],
2815 );
2816 }
2817
2818 #[gpui::test]
2819 async fn test_list_with_indented_task() {
2820 let parsed = parse(
2821 "\
2822- [ ] TODO
2823 - [x] Checked
2824 - Unordered
2825 1. Number 1
2826 1. Number 2
28271. Number A
2828",
2829 )
2830 .await;
2831
2832 assert_eq!(
2833 parsed.children,
2834 vec![
2835 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2836 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2837 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2838 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2839 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2840 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2841 ],
2842 );
2843 }
2844
2845 #[gpui::test]
2846 async fn test_list_with_linebreak_is_handled_correctly() {
2847 let parsed = parse(
2848 "\
2849- [ ] Task 1
2850
2851- [x] Task 2
2852",
2853 )
2854 .await;
2855
2856 assert_eq!(
2857 parsed.children,
2858 vec![
2859 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2860 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2861 ],
2862 );
2863 }
2864
2865 #[gpui::test]
2866 async fn test_list_nested() {
2867 let parsed = parse(
2868 "\
2869* Item 1
2870* Item 2
2871* Item 3
2872
28731. Hello
28741. Two
2875 1. Three
28762. Four
28773. Five
2878
2879* First
2880 1. Hello
2881 1. Goodbyte
2882 - Inner
2883 - Inner
2884 2. Goodbyte
2885 - Next item empty
2886 -
2887* Last
2888",
2889 )
2890 .await;
2891
2892 assert_eq!(
2893 parsed.children,
2894 vec![
2895 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2896 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2897 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2898 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2899 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2900 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2901 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2902 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2903 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2904 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2905 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2906 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2907 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2908 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2909 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2910 list_item(186..190, 3, Unordered, vec![]),
2911 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2912 ]
2913 );
2914 }
2915
2916 #[gpui::test]
2917 async fn test_list_with_nested_content() {
2918 let parsed = parse(
2919 "\
2920* This is a list item with two paragraphs.
2921
2922 This is the second paragraph in the list item.
2923",
2924 )
2925 .await;
2926
2927 assert_eq!(
2928 parsed.children,
2929 vec![list_item(
2930 0..96,
2931 1,
2932 Unordered,
2933 vec![
2934 p("This is a list item with two paragraphs.", 4..44),
2935 p("This is the second paragraph in the list item.", 50..97)
2936 ],
2937 ),],
2938 );
2939 }
2940
2941 #[gpui::test]
2942 async fn test_list_item_with_inline_html() {
2943 let parsed = parse(
2944 "\
2945* This is a list item with an inline HTML <sometag>tag</sometag>.
2946",
2947 )
2948 .await;
2949
2950 assert_eq!(
2951 parsed.children,
2952 vec![list_item(
2953 0..67,
2954 1,
2955 Unordered,
2956 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2957 ),],
2958 );
2959 }
2960
2961 #[gpui::test]
2962 async fn test_nested_list_with_paragraph_inside() {
2963 let parsed = parse(
2964 "\
29651. a
2966 1. b
2967 1. c
2968
2969 text
2970
2971 1. d
2972",
2973 )
2974 .await;
2975
2976 assert_eq!(
2977 parsed.children,
2978 vec![
2979 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2980 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2981 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2982 p("text", 32..37),
2983 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2984 ],
2985 );
2986 }
2987
2988 #[gpui::test]
2989 async fn test_list_with_leading_text() {
2990 let parsed = parse(
2991 "\
2992* `code`
2993* **bold**
2994* [link](https://example.com)
2995",
2996 )
2997 .await;
2998
2999 assert_eq!(
3000 parsed.children,
3001 vec![
3002 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
3003 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
3004 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
3005 ],
3006 );
3007 }
3008
3009 #[gpui::test]
3010 async fn test_simple_block_quote() {
3011 let parsed = parse("> Simple block quote with **styled text**").await;
3012
3013 assert_eq!(
3014 parsed.children,
3015 vec![block_quote(
3016 vec![p("Simple block quote with styled text", 2..41)],
3017 0..41
3018 )]
3019 );
3020 }
3021
3022 #[gpui::test]
3023 async fn test_simple_block_quote_with_multiple_lines() {
3024 let parsed = parse(
3025 "\
3026> # Heading
3027> More
3028> text
3029>
3030> More text
3031",
3032 )
3033 .await;
3034
3035 assert_eq!(
3036 parsed.children,
3037 vec![block_quote(
3038 vec![
3039 h1(text("Heading", 4..11), 2..12),
3040 p("More text", 14..26),
3041 p("More text", 30..40)
3042 ],
3043 0..40
3044 )]
3045 );
3046 }
3047
3048 #[gpui::test]
3049 async fn test_nested_block_quote() {
3050 let parsed = parse(
3051 "\
3052> A
3053>
3054> > # B
3055>
3056> C
3057
3058More text
3059",
3060 )
3061 .await;
3062
3063 assert_eq!(
3064 parsed.children,
3065 vec![
3066 block_quote(
3067 vec![
3068 p("A", 2..4),
3069 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
3070 p("C", 18..20)
3071 ],
3072 0..20
3073 ),
3074 p("More text", 21..31)
3075 ]
3076 );
3077 }
3078
3079 #[gpui::test]
3080 async fn test_code_block() {
3081 let parsed = parse(
3082 "\
3083```
3084fn main() {
3085 return 0;
3086}
3087```
3088",
3089 )
3090 .await;
3091
3092 assert_eq!(
3093 parsed.children,
3094 vec![code_block(
3095 None,
3096 "fn main() {\n return 0;\n}",
3097 0..35,
3098 None
3099 )]
3100 );
3101 }
3102
3103 #[gpui::test]
3104 async fn test_code_block_with_language(executor: BackgroundExecutor) {
3105 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
3106 language_registry.add(language::rust_lang());
3107
3108 let parsed = parse_markdown(
3109 "\
3110```rust
3111fn main() {
3112 return 0;
3113}
3114```
3115",
3116 None,
3117 Some(language_registry),
3118 )
3119 .await;
3120
3121 assert_eq!(
3122 parsed.children,
3123 vec![code_block(
3124 Some("rust".to_string()),
3125 "fn main() {\n return 0;\n}",
3126 0..39,
3127 Some(vec![])
3128 )]
3129 );
3130 }
3131
3132 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3133 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3134 source_range,
3135 level: HeadingLevel::H1,
3136 contents,
3137 })
3138 }
3139
3140 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3141 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3142 source_range,
3143 level: HeadingLevel::H2,
3144 contents,
3145 })
3146 }
3147
3148 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3149 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3150 source_range,
3151 level: HeadingLevel::H3,
3152 contents,
3153 })
3154 }
3155
3156 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
3157 ParsedMarkdownElement::Paragraph(text(contents, source_range))
3158 }
3159
3160 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
3161 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
3162 highlights: Vec::new(),
3163 regions: Vec::new(),
3164 source_range,
3165 contents: contents.to_string().into(),
3166 })]
3167 }
3168
3169 fn block_quote(
3170 children: Vec<ParsedMarkdownElement>,
3171 source_range: Range<usize>,
3172 ) -> ParsedMarkdownElement {
3173 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
3174 source_range,
3175 children,
3176 })
3177 }
3178
3179 fn code_block(
3180 language: Option<String>,
3181 code: &str,
3182 source_range: Range<usize>,
3183 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
3184 ) -> ParsedMarkdownElement {
3185 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
3186 source_range,
3187 language,
3188 contents: code.to_string().into(),
3189 highlights,
3190 })
3191 }
3192
3193 fn list_item(
3194 source_range: Range<usize>,
3195 depth: u16,
3196 item_type: ParsedMarkdownListItemType,
3197 content: Vec<ParsedMarkdownElement>,
3198 ) -> ParsedMarkdownElement {
3199 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3200 source_range,
3201 item_type,
3202 depth,
3203 content,
3204 nested: false,
3205 })
3206 }
3207
3208 fn nested_list_item(
3209 source_range: Range<usize>,
3210 depth: u16,
3211 item_type: ParsedMarkdownListItemType,
3212 content: Vec<ParsedMarkdownElement>,
3213 ) -> ParsedMarkdownElement {
3214 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3215 source_range,
3216 item_type,
3217 depth,
3218 content,
3219 nested: true,
3220 })
3221 }
3222
3223 fn table(
3224 source_range: Range<usize>,
3225 caption: Option<MarkdownParagraph>,
3226 header: Vec<ParsedMarkdownTableRow>,
3227 body: Vec<ParsedMarkdownTableRow>,
3228 ) -> ParsedMarkdownTable {
3229 ParsedMarkdownTable {
3230 source_range,
3231 header,
3232 body,
3233 caption,
3234 }
3235 }
3236
3237 fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
3238 ParsedMarkdownTableRow { columns }
3239 }
3240
3241 fn column(
3242 col_span: usize,
3243 row_span: usize,
3244 is_header: bool,
3245 children: MarkdownParagraph,
3246 alignment: ParsedMarkdownTableAlignment,
3247 ) -> ParsedMarkdownTableColumn {
3248 ParsedMarkdownTableColumn {
3249 col_span,
3250 row_span,
3251 is_header,
3252 children,
3253 alignment,
3254 }
3255 }
3256
3257 impl PartialEq for ParsedMarkdownTable {
3258 fn eq(&self, other: &Self) -> bool {
3259 self.source_range == other.source_range
3260 && self.header == other.header
3261 && self.body == other.body
3262 }
3263 }
3264
3265 impl PartialEq for ParsedMarkdownText {
3266 fn eq(&self, other: &Self) -> bool {
3267 self.source_range == other.source_range && self.contents == other.contents
3268 }
3269 }
3270}