1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15use ui::SharedString;
16
17pub async fn parse_markdown(
18 markdown_input: &str,
19 file_location_directory: Option<PathBuf>,
20 language_registry: Option<Arc<LanguageRegistry>>,
21) -> ParsedMarkdown {
22 let mut options = Options::all();
23 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
24
25 let parser = Parser::new_ext(markdown_input, options);
26 let parser = MarkdownParser::new(
27 parser.into_offset_iter().collect(),
28 file_location_directory,
29 language_registry,
30 );
31 let renderer = parser.parse_document().await;
32 ParsedMarkdown {
33 children: renderer.parsed,
34 }
35}
36
37fn cleanup_html(source: &str) -> Vec<u8> {
38 let mut writer = std::io::Cursor::new(Vec::new());
39 let mut reader = std::io::Cursor::new(source);
40 let mut minify = Minifier::new(
41 &mut writer,
42 MinifierOptions {
43 omit_doctype: true,
44 collapse_whitespace: true,
45 ..Default::default()
46 },
47 );
48 if let Ok(()) = minify.minify(&mut reader) {
49 writer.into_inner()
50 } else {
51 source.bytes().collect()
52 }
53}
54
55struct MarkdownParser<'a> {
56 tokens: Vec<(Event<'a>, Range<usize>)>,
57 /// The current index in the tokens array
58 cursor: usize,
59 /// The blocks that we have successfully parsed so far
60 parsed: Vec<ParsedMarkdownElement>,
61 file_location_directory: Option<PathBuf>,
62 language_registry: Option<Arc<LanguageRegistry>>,
63}
64
65#[derive(Debug)]
66struct ParseHtmlNodeContext {
67 list_item_depth: u16,
68}
69
70impl Default for ParseHtmlNodeContext {
71 fn default() -> Self {
72 Self { list_item_depth: 1 }
73 }
74}
75
76struct MarkdownListItem {
77 content: Vec<ParsedMarkdownElement>,
78 item_type: ParsedMarkdownListItemType,
79}
80
81impl Default for MarkdownListItem {
82 fn default() -> Self {
83 Self {
84 content: Vec::new(),
85 item_type: ParsedMarkdownListItemType::Unordered,
86 }
87 }
88}
89
90impl<'a> MarkdownParser<'a> {
91 fn new(
92 tokens: Vec<(Event<'a>, Range<usize>)>,
93 file_location_directory: Option<PathBuf>,
94 language_registry: Option<Arc<LanguageRegistry>>,
95 ) -> Self {
96 Self {
97 tokens,
98 file_location_directory,
99 language_registry,
100 cursor: 0,
101 parsed: vec![],
102 }
103 }
104
105 fn eof(&self) -> bool {
106 if self.tokens.is_empty() {
107 return true;
108 }
109 self.cursor >= self.tokens.len() - 1
110 }
111
112 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
113 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
114 return self.tokens.last();
115 }
116 self.tokens.get(self.cursor + steps)
117 }
118
119 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
120 if self.cursor == 0 || self.cursor > self.tokens.len() {
121 return None;
122 }
123 self.tokens.get(self.cursor - 1)
124 }
125
126 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
127 self.peek(0)
128 }
129
130 fn current_event(&self) -> Option<&Event<'_>> {
131 self.current().map(|(event, _)| event)
132 }
133
134 fn is_text_like(event: &Event) -> bool {
135 match event {
136 Event::Text(_)
137 // Represent an inline code block
138 | Event::Code(_)
139 | Event::Html(_)
140 | Event::InlineHtml(_)
141 | Event::FootnoteReference(_)
142 | Event::Start(Tag::Link { .. })
143 | Event::Start(Tag::Emphasis)
144 | Event::Start(Tag::Strong)
145 | Event::Start(Tag::Strikethrough)
146 | Event::Start(Tag::Image { .. }) => {
147 true
148 }
149 _ => false,
150 }
151 }
152
153 async fn parse_document(mut self) -> Self {
154 while !self.eof() {
155 if let Some(block) = self.parse_block().await {
156 self.parsed.extend(block);
157 } else {
158 self.cursor += 1;
159 }
160 }
161 self
162 }
163
164 #[async_recursion]
165 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
166 let (current, source_range) = self.current().unwrap();
167 let source_range = source_range.clone();
168 match current {
169 Event::Start(tag) => match tag {
170 Tag::Paragraph => {
171 self.cursor += 1;
172 let text = self.parse_text(false, Some(source_range));
173 Some(vec![ParsedMarkdownElement::Paragraph(text)])
174 }
175 Tag::Heading { level, .. } => {
176 let level = *level;
177 self.cursor += 1;
178 let heading = self.parse_heading(level);
179 Some(vec![ParsedMarkdownElement::Heading(heading)])
180 }
181 Tag::Table(alignment) => {
182 let alignment = alignment.clone();
183 self.cursor += 1;
184 let table = self.parse_table(alignment);
185 Some(vec![ParsedMarkdownElement::Table(table)])
186 }
187 Tag::List(order) => {
188 let order = *order;
189 self.cursor += 1;
190 let list = self.parse_list(order).await;
191 Some(list)
192 }
193 Tag::BlockQuote(_kind) => {
194 self.cursor += 1;
195 let block_quote = self.parse_block_quote().await;
196 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
197 }
198 Tag::CodeBlock(kind) => {
199 let language = match kind {
200 pulldown_cmark::CodeBlockKind::Indented => None,
201 pulldown_cmark::CodeBlockKind::Fenced(language) => {
202 if language.is_empty() {
203 None
204 } else {
205 Some(language.to_string())
206 }
207 }
208 };
209
210 self.cursor += 1;
211
212 let code_block = self.parse_code_block(language).await?;
213 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
214 }
215 Tag::HtmlBlock => {
216 self.cursor += 1;
217
218 Some(self.parse_html_block().await)
219 }
220 _ => None,
221 },
222 Event::Rule => {
223 self.cursor += 1;
224 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
225 }
226 _ => None,
227 }
228 }
229
230 fn parse_text(
231 &mut self,
232 should_complete_on_soft_break: bool,
233 source_range: Option<Range<usize>>,
234 ) -> MarkdownParagraph {
235 let source_range = source_range.unwrap_or_else(|| {
236 self.current()
237 .map(|(_, range)| range.clone())
238 .unwrap_or_default()
239 });
240
241 let mut markdown_text_like = Vec::new();
242 let mut text = String::new();
243 let mut bold_depth = 0;
244 let mut italic_depth = 0;
245 let mut strikethrough_depth = 0;
246 let mut link: Option<Link> = None;
247 let mut image: Option<Image> = None;
248 let mut region_ranges: Vec<Range<usize>> = vec![];
249 let mut regions: Vec<ParsedRegion> = vec![];
250 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
251 let mut link_urls: Vec<String> = vec![];
252 let mut link_ranges: Vec<Range<usize>> = vec![];
253
254 loop {
255 if self.eof() {
256 break;
257 }
258
259 let (current, _) = self.current().unwrap();
260 let prev_len = text.len();
261 match current {
262 Event::SoftBreak => {
263 if should_complete_on_soft_break {
264 break;
265 }
266 text.push(' ');
267 }
268
269 Event::HardBreak => {
270 text.push('\n');
271 }
272
273 // We want to ignore any inline HTML tags in the text but keep
274 // the text between them
275 Event::InlineHtml(_) => {}
276
277 Event::Text(t) => {
278 text.push_str(t.as_ref());
279 let mut style = MarkdownHighlightStyle::default();
280
281 if bold_depth > 0 {
282 style.weight = FontWeight::BOLD;
283 }
284
285 if italic_depth > 0 {
286 style.italic = true;
287 }
288
289 if strikethrough_depth > 0 {
290 style.strikethrough = true;
291 }
292
293 let last_run_len = if let Some(link) = link.clone() {
294 region_ranges.push(prev_len..text.len());
295 regions.push(ParsedRegion {
296 code: false,
297 link: Some(link),
298 });
299 style.link = true;
300 prev_len
301 } else {
302 // Manually scan for links
303 let mut finder = linkify::LinkFinder::new();
304 finder.kinds(&[linkify::LinkKind::Url]);
305 let mut last_link_len = prev_len;
306 for link in finder.links(t) {
307 let start = prev_len + link.start();
308 let end = prev_len + link.end();
309 let range = start..end;
310 link_ranges.push(range.clone());
311 link_urls.push(link.as_str().to_string());
312
313 // If there is a style before we match a link, we have to add this to the highlighted ranges
314 if style != MarkdownHighlightStyle::default() && last_link_len < start {
315 highlights.push((
316 last_link_len..start,
317 MarkdownHighlight::Style(style.clone()),
318 ));
319 }
320
321 highlights.push((
322 range.clone(),
323 MarkdownHighlight::Style(MarkdownHighlightStyle {
324 underline: true,
325 ..style
326 }),
327 ));
328 region_ranges.push(range.clone());
329 regions.push(ParsedRegion {
330 code: false,
331 link: Some(Link::Web {
332 url: link.as_str().to_string(),
333 }),
334 });
335 last_link_len = end;
336 }
337 last_link_len
338 };
339
340 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
341 let mut new_highlight = true;
342 if let Some((last_range, last_style)) = highlights.last_mut()
343 && last_range.end == last_run_len
344 && last_style == &MarkdownHighlight::Style(style.clone())
345 {
346 last_range.end = text.len();
347 new_highlight = false;
348 }
349 if new_highlight {
350 highlights.push((
351 last_run_len..text.len(),
352 MarkdownHighlight::Style(style.clone()),
353 ));
354 }
355 }
356 }
357 Event::Code(t) => {
358 text.push_str(t.as_ref());
359 region_ranges.push(prev_len..text.len());
360
361 if link.is_some() {
362 highlights.push((
363 prev_len..text.len(),
364 MarkdownHighlight::Style(MarkdownHighlightStyle {
365 link: true,
366 ..Default::default()
367 }),
368 ));
369 }
370 regions.push(ParsedRegion {
371 code: true,
372 link: link.clone(),
373 });
374 }
375 Event::Start(tag) => match tag {
376 Tag::Emphasis => italic_depth += 1,
377 Tag::Strong => bold_depth += 1,
378 Tag::Strikethrough => strikethrough_depth += 1,
379 Tag::Link { dest_url, .. } => {
380 link = Link::identify(
381 self.file_location_directory.clone(),
382 dest_url.to_string(),
383 );
384 }
385 Tag::Image { dest_url, .. } => {
386 if !text.is_empty() {
387 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
388 source_range: source_range.clone(),
389 contents: mem::take(&mut text).into(),
390 highlights: mem::take(&mut highlights),
391 region_ranges: mem::take(&mut region_ranges),
392 regions: mem::take(&mut regions),
393 });
394 markdown_text_like.push(parsed_regions);
395 }
396 image = Image::identify(
397 dest_url.to_string(),
398 source_range.clone(),
399 self.file_location_directory.clone(),
400 );
401 }
402 _ => {
403 break;
404 }
405 },
406
407 Event::End(tag) => match tag {
408 TagEnd::Emphasis => italic_depth -= 1,
409 TagEnd::Strong => bold_depth -= 1,
410 TagEnd::Strikethrough => strikethrough_depth -= 1,
411 TagEnd::Link => {
412 link = None;
413 }
414 TagEnd::Image => {
415 if let Some(mut image) = image.take() {
416 if !text.is_empty() {
417 image.set_alt_text(std::mem::take(&mut text).into());
418 mem::take(&mut highlights);
419 mem::take(&mut region_ranges);
420 mem::take(&mut regions);
421 }
422 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
423 }
424 }
425 TagEnd::Paragraph => {
426 self.cursor += 1;
427 break;
428 }
429 _ => {
430 break;
431 }
432 },
433 _ => {
434 break;
435 }
436 }
437
438 self.cursor += 1;
439 }
440 if !text.is_empty() {
441 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
442 source_range,
443 contents: text.into(),
444 highlights,
445 regions,
446 region_ranges,
447 }));
448 }
449 markdown_text_like
450 }
451
452 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
453 let (_event, source_range) = self.previous().unwrap();
454 let source_range = source_range.clone();
455 let text = self.parse_text(true, None);
456
457 // Advance past the heading end tag
458 self.cursor += 1;
459
460 ParsedMarkdownHeading {
461 source_range,
462 level: match level {
463 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
464 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
465 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
466 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
467 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
468 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
469 },
470 contents: text,
471 }
472 }
473
474 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
475 let (_event, source_range) = self.previous().unwrap();
476 let source_range = source_range.clone();
477 let mut header = vec![];
478 let mut body = vec![];
479 let mut row_columns = vec![];
480 let mut in_header = true;
481 let column_alignments = alignment
482 .iter()
483 .map(Self::convert_alignment)
484 .collect::<Vec<_>>();
485
486 loop {
487 if self.eof() {
488 break;
489 }
490
491 let (current, source_range) = self.current().unwrap();
492 let source_range = source_range.clone();
493 match current {
494 Event::Start(Tag::TableHead)
495 | Event::Start(Tag::TableRow)
496 | Event::End(TagEnd::TableCell) => {
497 self.cursor += 1;
498 }
499 Event::Start(Tag::TableCell) => {
500 self.cursor += 1;
501 let cell_contents = self.parse_text(false, Some(source_range));
502 row_columns.push(ParsedMarkdownTableColumn {
503 col_span: 1,
504 row_span: 1,
505 is_header: in_header,
506 children: cell_contents,
507 alignment: column_alignments
508 .get(row_columns.len())
509 .copied()
510 .unwrap_or_default(),
511 });
512 }
513 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
514 self.cursor += 1;
515 let columns = std::mem::take(&mut row_columns);
516 if in_header {
517 header.push(ParsedMarkdownTableRow { columns: columns });
518 in_header = false;
519 } else {
520 body.push(ParsedMarkdownTableRow::with_columns(columns));
521 }
522 }
523 Event::End(TagEnd::Table) => {
524 self.cursor += 1;
525 break;
526 }
527 _ => {
528 break;
529 }
530 }
531 }
532
533 ParsedMarkdownTable {
534 source_range,
535 header,
536 body,
537 caption: None,
538 }
539 }
540
541 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
542 match alignment {
543 Alignment::None => ParsedMarkdownTableAlignment::None,
544 Alignment::Left => ParsedMarkdownTableAlignment::Left,
545 Alignment::Center => ParsedMarkdownTableAlignment::Center,
546 Alignment::Right => ParsedMarkdownTableAlignment::Right,
547 }
548 }
549
550 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
551 let (_, list_source_range) = self.previous().unwrap();
552
553 let mut items = Vec::new();
554 let mut items_stack = vec![MarkdownListItem::default()];
555 let mut depth = 1;
556 let mut order = order;
557 let mut order_stack = Vec::new();
558
559 let mut insertion_indices = FxHashMap::default();
560 let mut source_ranges = FxHashMap::default();
561 let mut start_item_range = list_source_range.clone();
562
563 while !self.eof() {
564 let (current, source_range) = self.current().unwrap();
565 match current {
566 Event::Start(Tag::List(new_order)) => {
567 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
568 insertion_indices.insert(depth, items.len());
569 }
570
571 // We will use the start of the nested list as the end for the current item's range,
572 // because we don't care about the hierarchy of list items
573 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
574 e.insert(start_item_range.start..source_range.start);
575 }
576
577 order_stack.push(order);
578 order = *new_order;
579 self.cursor += 1;
580 depth += 1;
581 }
582 Event::End(TagEnd::List(_)) => {
583 order = order_stack.pop().flatten();
584 self.cursor += 1;
585 depth -= 1;
586
587 if depth == 0 {
588 break;
589 }
590 }
591 Event::Start(Tag::Item) => {
592 start_item_range = source_range.clone();
593
594 self.cursor += 1;
595 items_stack.push(MarkdownListItem::default());
596
597 let mut task_list = None;
598 // Check for task list marker (`- [ ]` or `- [x]`)
599 if let Some(event) = self.current_event() {
600 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
601 if event == &Event::Start(Tag::Paragraph) {
602 self.cursor += 1;
603 }
604
605 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
606 task_list = Some((*checked, range.clone()));
607 self.cursor += 1;
608 }
609 }
610
611 if let Some((event, range)) = self.current() {
612 // This is a plain list item.
613 // For example `- some text` or `1. [Docs](./docs.md)`
614 if MarkdownParser::is_text_like(event) {
615 let text = self.parse_text(false, Some(range.clone()));
616 let block = ParsedMarkdownElement::Paragraph(text);
617 if let Some(content) = items_stack.last_mut() {
618 let item_type = if let Some((checked, range)) = task_list {
619 ParsedMarkdownListItemType::Task(checked, range)
620 } else if let Some(order) = order {
621 ParsedMarkdownListItemType::Ordered(order)
622 } else {
623 ParsedMarkdownListItemType::Unordered
624 };
625 content.item_type = item_type;
626 content.content.push(block);
627 }
628 } else {
629 let block = self.parse_block().await;
630 if let Some(block) = block
631 && let Some(list_item) = items_stack.last_mut()
632 {
633 list_item.content.extend(block);
634 }
635 }
636 }
637
638 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
639 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
640 self.cursor += 1;
641 }
642 }
643 Event::End(TagEnd::Item) => {
644 self.cursor += 1;
645
646 if let Some(current) = order {
647 order = Some(current + 1);
648 }
649
650 if let Some(list_item) = items_stack.pop() {
651 let source_range = source_ranges
652 .remove(&depth)
653 .unwrap_or(start_item_range.clone());
654
655 // We need to remove the last character of the source range, because it includes the newline character
656 let source_range = source_range.start..source_range.end - 1;
657 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
658 source_range,
659 content: list_item.content,
660 depth,
661 item_type: list_item.item_type,
662 nested: false,
663 });
664
665 if let Some(index) = insertion_indices.get(&depth) {
666 items.insert(*index, item);
667 insertion_indices.remove(&depth);
668 } else {
669 items.push(item);
670 }
671 }
672 }
673 _ => {
674 if depth == 0 {
675 break;
676 }
677 // This can only happen if a list item starts with more then one paragraph,
678 // or the list item contains blocks that should be rendered after the nested list items
679 let block = self.parse_block().await;
680 if let Some(block) = block {
681 if let Some(list_item) = items_stack.last_mut() {
682 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
683 if !insertion_indices.contains_key(&depth) {
684 list_item.content.extend(block);
685 continue;
686 }
687 }
688
689 // Otherwise we need to insert the block after all the nested items
690 // that have been parsed so far
691 items.extend(block);
692 } else {
693 self.cursor += 1;
694 }
695 }
696 }
697 }
698
699 items
700 }
701
702 #[async_recursion]
703 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
704 let (_event, source_range) = self.previous().unwrap();
705 let source_range = source_range.clone();
706 let mut nested_depth = 1;
707
708 let mut children: Vec<ParsedMarkdownElement> = vec![];
709
710 while !self.eof() {
711 let block = self.parse_block().await;
712
713 if let Some(block) = block {
714 children.extend(block);
715 } else {
716 break;
717 }
718
719 if self.eof() {
720 break;
721 }
722
723 let (current, _source_range) = self.current().unwrap();
724 match current {
725 // This is a nested block quote.
726 // Record that we're in a nested block quote and continue parsing.
727 // We don't need to advance the cursor since the next
728 // call to `parse_block` will handle it.
729 Event::Start(Tag::BlockQuote(_kind)) => {
730 nested_depth += 1;
731 }
732 Event::End(TagEnd::BlockQuote(_kind)) => {
733 nested_depth -= 1;
734 if nested_depth == 0 {
735 self.cursor += 1;
736 break;
737 }
738 }
739 _ => {}
740 };
741 }
742
743 ParsedMarkdownBlockQuote {
744 source_range,
745 children,
746 }
747 }
748
749 async fn parse_code_block(
750 &mut self,
751 language: Option<String>,
752 ) -> Option<ParsedMarkdownCodeBlock> {
753 let Some((_event, source_range)) = self.previous() else {
754 return None;
755 };
756
757 let source_range = source_range.clone();
758 let mut code = String::new();
759
760 while !self.eof() {
761 let Some((current, _source_range)) = self.current() else {
762 break;
763 };
764
765 match current {
766 Event::Text(text) => {
767 code.push_str(text);
768 self.cursor += 1;
769 }
770 Event::End(TagEnd::CodeBlock) => {
771 self.cursor += 1;
772 break;
773 }
774 _ => {
775 break;
776 }
777 }
778 }
779
780 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
781
782 let highlights = if let Some(language) = &language {
783 if let Some(registry) = &self.language_registry {
784 let rope: language::Rope = code.as_str().into();
785 registry
786 .language_for_name_or_extension(language)
787 .await
788 .map(|l| l.highlight_text(&rope, 0..code.len()))
789 .ok()
790 } else {
791 None
792 }
793 } else {
794 None
795 };
796
797 Some(ParsedMarkdownCodeBlock {
798 source_range,
799 contents: code.into(),
800 language,
801 highlights,
802 })
803 }
804
805 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
806 let mut elements = Vec::new();
807 let Some((_event, _source_range)) = self.previous() else {
808 return elements;
809 };
810
811 let mut html_source_range_start = None;
812 let mut html_source_range_end = None;
813 let mut html_buffer = String::new();
814
815 while !self.eof() {
816 let Some((current, source_range)) = self.current() else {
817 break;
818 };
819 let source_range = source_range.clone();
820 match current {
821 Event::Html(html) => {
822 html_source_range_start.get_or_insert(source_range.start);
823 html_source_range_end = Some(source_range.end);
824 html_buffer.push_str(html);
825 self.cursor += 1;
826 }
827 Event::End(TagEnd::CodeBlock) => {
828 self.cursor += 1;
829 break;
830 }
831 _ => {
832 break;
833 }
834 }
835 }
836
837 let bytes = cleanup_html(&html_buffer);
838
839 let mut cursor = std::io::Cursor::new(bytes);
840 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
841 .from_utf8()
842 .read_from(&mut cursor)
843 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
844 {
845 self.parse_html_node(
846 start..end,
847 &dom.document,
848 &mut elements,
849 &ParseHtmlNodeContext::default(),
850 );
851 }
852
853 elements
854 }
855
856 fn parse_html_node(
857 &self,
858 source_range: Range<usize>,
859 node: &Rc<markup5ever_rcdom::Node>,
860 elements: &mut Vec<ParsedMarkdownElement>,
861 context: &ParseHtmlNodeContext,
862 ) {
863 match &node.data {
864 markup5ever_rcdom::NodeData::Document => {
865 self.consume_children(source_range, node, elements, context);
866 }
867 markup5ever_rcdom::NodeData::Text { contents } => {
868 elements.push(ParsedMarkdownElement::Paragraph(vec![
869 MarkdownParagraphChunk::Text(ParsedMarkdownText {
870 source_range,
871 regions: Vec::default(),
872 region_ranges: Vec::default(),
873 highlights: Vec::default(),
874 contents: contents.borrow().to_string().into(),
875 }),
876 ]));
877 }
878 markup5ever_rcdom::NodeData::Comment { .. } => {}
879 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
880 let mut styles = if let Some(styles) = Self::markdown_style_from_html_styles(
881 Self::extract_styles_from_attributes(attrs),
882 ) {
883 vec![MarkdownHighlight::Style(styles)]
884 } else {
885 Vec::default()
886 };
887
888 if local_name!("img") == name.local {
889 if let Some(image) = self.extract_image(source_range, attrs) {
890 elements.push(ParsedMarkdownElement::Image(image));
891 }
892 } else if local_name!("p") == name.local {
893 let mut paragraph = MarkdownParagraph::new();
894 self.parse_paragraph(source_range, node, &mut paragraph, &mut styles);
895
896 if !paragraph.is_empty() {
897 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
898 }
899 } else if matches!(
900 name.local,
901 local_name!("h1")
902 | local_name!("h2")
903 | local_name!("h3")
904 | local_name!("h4")
905 | local_name!("h5")
906 | local_name!("h6")
907 ) {
908 let mut paragraph = MarkdownParagraph::new();
909 self.consume_paragraph(source_range.clone(), node, &mut paragraph, &mut styles);
910
911 if !paragraph.is_empty() {
912 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
913 source_range,
914 level: match name.local {
915 local_name!("h1") => HeadingLevel::H1,
916 local_name!("h2") => HeadingLevel::H2,
917 local_name!("h3") => HeadingLevel::H3,
918 local_name!("h4") => HeadingLevel::H4,
919 local_name!("h5") => HeadingLevel::H5,
920 local_name!("h6") => HeadingLevel::H6,
921 _ => unreachable!(),
922 },
923 contents: paragraph,
924 }));
925 }
926 } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
927 if let Some(list_items) = self.extract_html_list(
928 node,
929 local_name!("ol") == name.local,
930 context.list_item_depth,
931 source_range,
932 ) {
933 elements.extend(list_items);
934 }
935 } else if local_name!("blockquote") == name.local {
936 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
937 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
938 }
939 } else if local_name!("table") == name.local {
940 if let Some(table) = self.extract_html_table(node, source_range) {
941 elements.push(ParsedMarkdownElement::Table(table));
942 }
943 } else {
944 self.consume_children(source_range, node, elements, context);
945 }
946 }
947 _ => {}
948 }
949 }
950
951 fn parse_paragraph(
952 &self,
953 source_range: Range<usize>,
954 node: &Rc<markup5ever_rcdom::Node>,
955 paragraph: &mut MarkdownParagraph,
956 highlights: &mut Vec<MarkdownHighlight>,
957 ) {
958 fn add_highlight_range(
959 text: &String,
960 start: usize,
961 highlights: Vec<MarkdownHighlight>,
962 ) -> Vec<(Range<usize>, MarkdownHighlight)> {
963 highlights
964 .into_iter()
965 .map(|style| (start..text.len(), style))
966 .collect()
967 }
968
969 match &node.data {
970 markup5ever_rcdom::NodeData::Text { contents } => {
971 // append the text to the last chunk, so we can have a hacky version
972 // of inline text with highlighting
973 if let Some(text) = paragraph.iter_mut().last().and_then(|p| match p {
974 MarkdownParagraphChunk::Text(text) => Some(text),
975 _ => None,
976 }) {
977 let mut new_text = text.contents.to_string();
978 new_text.push_str(&contents.borrow());
979 let highlights = add_highlight_range(
980 &new_text,
981 text.contents.len(),
982 std::mem::take(highlights),
983 );
984
985 text.contents = SharedString::from(new_text);
986 text.highlights.extend(highlights);
987 } else {
988 let contents = contents.borrow().to_string();
989 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
990 source_range,
991 highlights: add_highlight_range(&contents, 0, std::mem::take(highlights)),
992 regions: Vec::default(),
993 contents: contents.into(),
994 region_ranges: Vec::default(),
995 }));
996 }
997 }
998 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
999 if local_name!("img") == name.local {
1000 if let Some(image) = self.extract_image(source_range, attrs) {
1001 paragraph.push(MarkdownParagraphChunk::Image(image));
1002 }
1003 } else if local_name!("b") == name.local || local_name!("strong") == name.local {
1004 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1005 weight: FontWeight::BOLD,
1006 ..Default::default()
1007 }));
1008
1009 self.consume_paragraph(source_range, node, paragraph, highlights);
1010 } else if local_name!("i") == name.local {
1011 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1012 italic: true,
1013 ..Default::default()
1014 }));
1015
1016 self.consume_paragraph(source_range, node, paragraph, highlights);
1017 } else if local_name!("em") == name.local {
1018 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1019 oblique: true,
1020 ..Default::default()
1021 }));
1022
1023 self.consume_paragraph(source_range, node, paragraph, highlights);
1024 } else if local_name!("del") == name.local {
1025 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1026 strikethrough: true,
1027 ..Default::default()
1028 }));
1029
1030 self.consume_paragraph(source_range, node, paragraph, highlights);
1031 } else if local_name!("ins") == name.local {
1032 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1033 underline: true,
1034 ..Default::default()
1035 }));
1036
1037 self.consume_paragraph(source_range, node, paragraph, highlights);
1038 } else {
1039 self.consume_paragraph(source_range, node, paragraph, highlights);
1040 }
1041 }
1042 _ => {}
1043 }
1044 }
1045
1046 fn consume_paragraph(
1047 &self,
1048 source_range: Range<usize>,
1049 node: &Rc<markup5ever_rcdom::Node>,
1050 paragraph: &mut MarkdownParagraph,
1051 highlights: &mut Vec<MarkdownHighlight>,
1052 ) {
1053 for node in node.children.borrow().iter() {
1054 self.parse_paragraph(source_range.clone(), node, paragraph, highlights);
1055 }
1056 }
1057
1058 fn parse_table_row(
1059 &self,
1060 source_range: Range<usize>,
1061 node: &Rc<markup5ever_rcdom::Node>,
1062 ) -> Option<ParsedMarkdownTableRow> {
1063 let mut columns = Vec::new();
1064
1065 match &node.data {
1066 markup5ever_rcdom::NodeData::Element { name, .. } => {
1067 if local_name!("tr") != name.local {
1068 return None;
1069 }
1070
1071 for node in node.children.borrow().iter() {
1072 if let Some(column) = self.parse_table_column(source_range.clone(), node) {
1073 columns.push(column);
1074 }
1075 }
1076 }
1077 _ => {}
1078 }
1079
1080 if columns.is_empty() {
1081 None
1082 } else {
1083 Some(ParsedMarkdownTableRow { columns })
1084 }
1085 }
1086
1087 fn parse_table_column(
1088 &self,
1089 source_range: Range<usize>,
1090 node: &Rc<markup5ever_rcdom::Node>,
1091 ) -> Option<ParsedMarkdownTableColumn> {
1092 match &node.data {
1093 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1094 if !matches!(name.local, local_name!("th") | local_name!("td")) {
1095 return None;
1096 }
1097
1098 let mut children = MarkdownParagraph::new();
1099 self.consume_paragraph(source_range, node, &mut children, &mut Vec::new());
1100
1101 let is_header = matches!(name.local, local_name!("th"));
1102
1103 Some(ParsedMarkdownTableColumn {
1104 col_span: std::cmp::max(
1105 Self::attr_value(attrs, local_name!("colspan"))
1106 .and_then(|span| span.parse().ok())
1107 .unwrap_or(1),
1108 1,
1109 ),
1110 row_span: std::cmp::max(
1111 Self::attr_value(attrs, local_name!("rowspan"))
1112 .and_then(|span| span.parse().ok())
1113 .unwrap_or(1),
1114 1,
1115 ),
1116 is_header,
1117 children,
1118 alignment: Self::attr_value(attrs, local_name!("align"))
1119 .and_then(|align| match align.as_str() {
1120 "left" => Some(ParsedMarkdownTableAlignment::Left),
1121 "center" => Some(ParsedMarkdownTableAlignment::Center),
1122 "right" => Some(ParsedMarkdownTableAlignment::Right),
1123 _ => None,
1124 })
1125 .unwrap_or_else(|| {
1126 if is_header {
1127 ParsedMarkdownTableAlignment::Center
1128 } else {
1129 ParsedMarkdownTableAlignment::default()
1130 }
1131 }),
1132 })
1133 }
1134 _ => None,
1135 }
1136 }
1137
1138 fn consume_children(
1139 &self,
1140 source_range: Range<usize>,
1141 node: &Rc<markup5ever_rcdom::Node>,
1142 elements: &mut Vec<ParsedMarkdownElement>,
1143 context: &ParseHtmlNodeContext,
1144 ) {
1145 for node in node.children.borrow().iter() {
1146 self.parse_html_node(source_range.clone(), node, elements, context);
1147 }
1148 }
1149
1150 fn attr_value(
1151 attrs: &RefCell<Vec<html5ever::Attribute>>,
1152 name: html5ever::LocalName,
1153 ) -> Option<String> {
1154 attrs.borrow().iter().find_map(|attr| {
1155 if attr.name.local == name {
1156 Some(attr.value.to_string())
1157 } else {
1158 None
1159 }
1160 })
1161 }
1162
1163 fn markdown_style_from_html_styles(
1164 styles: HashMap<String, String>,
1165 ) -> Option<MarkdownHighlightStyle> {
1166 let mut markdown_style = MarkdownHighlightStyle::default();
1167
1168 if let Some(text_decoration) = styles.get("text-decoration") {
1169 match text_decoration.to_lowercase().as_str() {
1170 "underline" => {
1171 markdown_style.underline = true;
1172 }
1173 "line-through" => {
1174 markdown_style.strikethrough = true;
1175 }
1176 _ => {}
1177 }
1178 }
1179
1180 if let Some(font_style) = styles.get("font-style") {
1181 match font_style.to_lowercase().as_str() {
1182 "italic" => {
1183 markdown_style.italic = true;
1184 }
1185 "oblique" => {
1186 markdown_style.oblique = true;
1187 }
1188 _ => {}
1189 }
1190 }
1191
1192 if let Some(font_weight) = styles.get("font-weight") {
1193 match font_weight.to_lowercase().as_str() {
1194 "bold" => {
1195 markdown_style.weight = FontWeight::BOLD;
1196 }
1197 "lighter" => {
1198 markdown_style.weight = FontWeight::THIN;
1199 }
1200 _ => {
1201 if let Some(weight) = font_weight.parse::<f32>().ok() {
1202 markdown_style.weight = FontWeight(weight);
1203 }
1204 }
1205 }
1206 }
1207
1208 if markdown_style != MarkdownHighlightStyle::default() {
1209 Some(markdown_style)
1210 } else {
1211 None
1212 }
1213 }
1214
1215 fn extract_styles_from_attributes(
1216 attrs: &RefCell<Vec<html5ever::Attribute>>,
1217 ) -> HashMap<String, String> {
1218 let mut styles = HashMap::new();
1219
1220 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1221 for decl in style.split(';') {
1222 let mut parts = decl.splitn(2, ':');
1223 if let Some((key, value)) = parts.next().zip(parts.next()) {
1224 styles.insert(
1225 key.trim().to_lowercase().to_string(),
1226 value.trim().to_string(),
1227 );
1228 }
1229 }
1230 }
1231
1232 styles
1233 }
1234
1235 fn extract_image(
1236 &self,
1237 source_range: Range<usize>,
1238 attrs: &RefCell<Vec<html5ever::Attribute>>,
1239 ) -> Option<Image> {
1240 let src = Self::attr_value(attrs, local_name!("src"))?;
1241
1242 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1243
1244 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1245 image.set_alt_text(alt.into());
1246 }
1247
1248 let styles = Self::extract_styles_from_attributes(attrs);
1249
1250 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1251 .or_else(|| styles.get("width").cloned())
1252 .and_then(|width| Self::parse_html_element_dimension(&width))
1253 {
1254 image.set_width(width);
1255 }
1256
1257 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1258 .or_else(|| styles.get("height").cloned())
1259 .and_then(|height| Self::parse_html_element_dimension(&height))
1260 {
1261 image.set_height(height);
1262 }
1263
1264 Some(image)
1265 }
1266
1267 fn extract_html_list(
1268 &self,
1269 node: &Rc<markup5ever_rcdom::Node>,
1270 ordered: bool,
1271 depth: u16,
1272 source_range: Range<usize>,
1273 ) -> Option<Vec<ParsedMarkdownElement>> {
1274 let mut list_items = Vec::with_capacity(node.children.borrow().len());
1275
1276 for (index, node) in node.children.borrow().iter().enumerate() {
1277 match &node.data {
1278 markup5ever_rcdom::NodeData::Element { name, .. } => {
1279 if local_name!("li") != name.local {
1280 continue;
1281 }
1282
1283 let mut content = Vec::new();
1284 self.consume_children(
1285 source_range.clone(),
1286 node,
1287 &mut content,
1288 &ParseHtmlNodeContext {
1289 list_item_depth: depth + 1,
1290 },
1291 );
1292
1293 if !content.is_empty() {
1294 list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1295 depth,
1296 source_range: source_range.clone(),
1297 item_type: if ordered {
1298 ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1299 } else {
1300 ParsedMarkdownListItemType::Unordered
1301 },
1302 content,
1303 nested: true,
1304 }));
1305 }
1306 }
1307 _ => {}
1308 }
1309 }
1310
1311 if list_items.is_empty() {
1312 None
1313 } else {
1314 Some(list_items)
1315 }
1316 }
1317
1318 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1319 if value.ends_with("%") {
1320 value
1321 .trim_end_matches("%")
1322 .parse::<f32>()
1323 .ok()
1324 .map(|value| relative(value / 100.))
1325 } else {
1326 value
1327 .trim_end_matches("px")
1328 .parse()
1329 .ok()
1330 .map(|value| px(value).into())
1331 }
1332 }
1333
1334 fn extract_html_blockquote(
1335 &self,
1336 node: &Rc<markup5ever_rcdom::Node>,
1337 source_range: Range<usize>,
1338 ) -> Option<ParsedMarkdownBlockQuote> {
1339 let mut children = Vec::new();
1340 self.consume_children(
1341 source_range.clone(),
1342 node,
1343 &mut children,
1344 &ParseHtmlNodeContext::default(),
1345 );
1346
1347 if children.is_empty() {
1348 None
1349 } else {
1350 Some(ParsedMarkdownBlockQuote {
1351 children,
1352 source_range,
1353 })
1354 }
1355 }
1356
1357 fn extract_html_table(
1358 &self,
1359 node: &Rc<markup5ever_rcdom::Node>,
1360 source_range: Range<usize>,
1361 ) -> Option<ParsedMarkdownTable> {
1362 let mut header_rows = Vec::new();
1363 let mut body_rows = Vec::new();
1364 let mut caption = None;
1365
1366 // node should be a thead, tbody or caption element
1367 for node in node.children.borrow().iter() {
1368 match &node.data {
1369 markup5ever_rcdom::NodeData::Element { name, .. } => {
1370 if local_name!("caption") == name.local {
1371 let mut paragraph = MarkdownParagraph::new();
1372 self.parse_paragraph(
1373 source_range.clone(),
1374 node,
1375 &mut paragraph,
1376 &mut Vec::new(),
1377 );
1378 caption = Some(paragraph);
1379 }
1380 if local_name!("thead") == name.local {
1381 // node should be a tr element
1382 for node in node.children.borrow().iter() {
1383 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1384 header_rows.push(row);
1385 }
1386 }
1387 } else if local_name!("tbody") == name.local {
1388 // node should be a tr element
1389 for node in node.children.borrow().iter() {
1390 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1391 body_rows.push(row);
1392 }
1393 }
1394 }
1395 }
1396 _ => {}
1397 }
1398 }
1399
1400 if !header_rows.is_empty() || !body_rows.is_empty() {
1401 Some(ParsedMarkdownTable {
1402 source_range,
1403 body: body_rows,
1404 header: header_rows,
1405 caption,
1406 })
1407 } else {
1408 None
1409 }
1410 }
1411}
1412
1413#[cfg(test)]
1414mod tests {
1415 use super::*;
1416 use ParsedMarkdownListItemType::*;
1417 use core::panic;
1418 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1419 use language::{
1420 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1421 };
1422 use pretty_assertions::assert_eq;
1423
1424 async fn parse(input: &str) -> ParsedMarkdown {
1425 parse_markdown(input, None, None).await
1426 }
1427
1428 #[gpui::test]
1429 async fn test_headings() {
1430 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1431
1432 assert_eq!(
1433 parsed.children,
1434 vec![
1435 h1(text("Heading one", 2..13), 0..14),
1436 h2(text("Heading two", 17..28), 14..29),
1437 h3(text("Heading three", 33..46), 29..46),
1438 ]
1439 );
1440 }
1441
1442 #[gpui::test]
1443 async fn test_newlines_dont_new_paragraphs() {
1444 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1445
1446 assert_eq!(
1447 parsed.children,
1448 vec![p("Some text that is bolded and italicized", 0..46)]
1449 );
1450 }
1451
1452 #[gpui::test]
1453 async fn test_heading_with_paragraph() {
1454 let parsed = parse("# Zed\nThe editor").await;
1455
1456 assert_eq!(
1457 parsed.children,
1458 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1459 );
1460 }
1461
1462 #[gpui::test]
1463 async fn test_double_newlines_do_new_paragraphs() {
1464 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1465
1466 assert_eq!(
1467 parsed.children,
1468 vec![
1469 p("Some text that is bolded", 0..29),
1470 p("and italicized", 31..47),
1471 ]
1472 );
1473 }
1474
1475 #[gpui::test]
1476 async fn test_bold_italic_text() {
1477 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1478
1479 assert_eq!(
1480 parsed.children,
1481 vec![p("Some text that is bolded and italicized", 0..45)]
1482 );
1483 }
1484
1485 #[gpui::test]
1486 async fn test_nested_bold_strikethrough_text() {
1487 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1488
1489 assert_eq!(parsed.children.len(), 1);
1490 assert_eq!(
1491 parsed.children[0],
1492 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1493 ParsedMarkdownText {
1494 source_range: 0..35,
1495 contents: "Some bostrikethroughld text".into(),
1496 highlights: Vec::new(),
1497 region_ranges: Vec::new(),
1498 regions: Vec::new(),
1499 }
1500 )])
1501 );
1502
1503 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1504 text
1505 } else {
1506 panic!("Expected a paragraph");
1507 };
1508
1509 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1510 text
1511 } else {
1512 panic!("Expected a text");
1513 };
1514
1515 assert_eq!(
1516 paragraph.highlights,
1517 vec![
1518 (
1519 5..7,
1520 MarkdownHighlight::Style(MarkdownHighlightStyle {
1521 weight: FontWeight::BOLD,
1522 ..Default::default()
1523 }),
1524 ),
1525 (
1526 7..20,
1527 MarkdownHighlight::Style(MarkdownHighlightStyle {
1528 weight: FontWeight::BOLD,
1529 strikethrough: true,
1530 ..Default::default()
1531 }),
1532 ),
1533 (
1534 20..22,
1535 MarkdownHighlight::Style(MarkdownHighlightStyle {
1536 weight: FontWeight::BOLD,
1537 ..Default::default()
1538 }),
1539 ),
1540 ]
1541 );
1542 }
1543
1544 #[gpui::test]
1545 async fn test_html_inline_style_elements() {
1546 let parsed =
1547 parse("<p>Some text <strong>strong text</strong> more text <b>bold text</b> more text <i>italic text</i> more text <em>emphasized text</em> more text <del>deleted text</del> more text <ins>inserted text</ins></p>").await;
1548
1549 assert_eq!(1, parsed.children.len());
1550 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1551 chunks
1552 } else {
1553 panic!("Expected a paragraph");
1554 };
1555
1556 assert_eq!(1, chunks.len());
1557 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1558 text
1559 } else {
1560 panic!("Expected a paragraph");
1561 };
1562
1563 assert_eq!(0..205, text.source_range);
1564 assert_eq!(
1565 "Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text",
1566 text.contents.as_str(),
1567 );
1568 assert_eq!(
1569 vec![
1570 (
1571 10..21,
1572 MarkdownHighlight::Style(MarkdownHighlightStyle {
1573 weight: FontWeight(700.0),
1574 ..Default::default()
1575 },),
1576 ),
1577 (
1578 32..41,
1579 MarkdownHighlight::Style(MarkdownHighlightStyle {
1580 weight: FontWeight(700.0),
1581 ..Default::default()
1582 },),
1583 ),
1584 (
1585 52..63,
1586 MarkdownHighlight::Style(MarkdownHighlightStyle {
1587 italic: true,
1588 weight: FontWeight(400.0),
1589 ..Default::default()
1590 },),
1591 ),
1592 (
1593 74..89,
1594 MarkdownHighlight::Style(MarkdownHighlightStyle {
1595 weight: FontWeight(400.0),
1596 oblique: true,
1597 ..Default::default()
1598 },),
1599 ),
1600 (
1601 100..112,
1602 MarkdownHighlight::Style(MarkdownHighlightStyle {
1603 strikethrough: true,
1604 weight: FontWeight(400.0),
1605 ..Default::default()
1606 },),
1607 ),
1608 (
1609 123..136,
1610 MarkdownHighlight::Style(MarkdownHighlightStyle {
1611 underline: true,
1612 weight: FontWeight(400.0,),
1613 ..Default::default()
1614 },),
1615 ),
1616 ],
1617 text.highlights
1618 );
1619 }
1620
1621 #[gpui::test]
1622 async fn test_text_with_inline_html() {
1623 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1624
1625 assert_eq!(
1626 parsed.children,
1627 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1628 );
1629 }
1630
1631 #[gpui::test]
1632 async fn test_raw_links_detection() {
1633 let parsed = parse("Checkout this https://zed.dev link").await;
1634
1635 assert_eq!(
1636 parsed.children,
1637 vec![p("Checkout this https://zed.dev link", 0..34)]
1638 );
1639 }
1640
1641 #[gpui::test]
1642 async fn test_empty_image() {
1643 let parsed = parse("![]()").await;
1644
1645 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1646 text
1647 } else {
1648 panic!("Expected a paragraph");
1649 };
1650 assert_eq!(paragraph.len(), 0);
1651 }
1652
1653 #[gpui::test]
1654 async fn test_image_links_detection() {
1655 let parsed = parse("").await;
1656
1657 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1658 text
1659 } else {
1660 panic!("Expected a paragraph");
1661 };
1662 assert_eq!(
1663 paragraph[0],
1664 MarkdownParagraphChunk::Image(Image {
1665 source_range: 0..111,
1666 link: Link::Web {
1667 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1668 },
1669 alt_text: Some("test".into()),
1670 height: None,
1671 width: None,
1672 },)
1673 );
1674 }
1675
1676 #[gpui::test]
1677 async fn test_image_alt_text() {
1678 let parsed = parse("[](https://zed.dev)\n ").await;
1679
1680 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1681 text
1682 } else {
1683 panic!("Expected a paragraph");
1684 };
1685 assert_eq!(
1686 paragraph[0],
1687 MarkdownParagraphChunk::Image(Image {
1688 source_range: 0..142,
1689 link: Link::Web {
1690 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1691 },
1692 alt_text: Some("Zed".into()),
1693 height: None,
1694 width: None,
1695 },)
1696 );
1697 }
1698
1699 #[gpui::test]
1700 async fn test_image_without_alt_text() {
1701 let parsed = parse("").await;
1702
1703 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1704 text
1705 } else {
1706 panic!("Expected a paragraph");
1707 };
1708 assert_eq!(
1709 paragraph[0],
1710 MarkdownParagraphChunk::Image(Image {
1711 source_range: 0..31,
1712 link: Link::Web {
1713 url: "http://example.com/foo.png".to_string(),
1714 },
1715 alt_text: None,
1716 height: None,
1717 width: None,
1718 },)
1719 );
1720 }
1721
1722 #[gpui::test]
1723 async fn test_image_with_alt_text_containing_formatting() {
1724 let parsed = parse("").await;
1725
1726 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1727 panic!("Expected a paragraph");
1728 };
1729 assert_eq!(
1730 chunks,
1731 &[MarkdownParagraphChunk::Image(Image {
1732 source_range: 0..44,
1733 link: Link::Web {
1734 url: "http://example.com/foo.png".to_string(),
1735 },
1736 alt_text: Some("foo bar baz".into()),
1737 height: None,
1738 width: None,
1739 }),],
1740 );
1741 }
1742
1743 #[gpui::test]
1744 async fn test_images_with_text_in_between() {
1745 let parsed = parse(
1746 "\nLorem Ipsum\n",
1747 )
1748 .await;
1749
1750 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1751 text
1752 } else {
1753 panic!("Expected a paragraph");
1754 };
1755 assert_eq!(
1756 chunks,
1757 &vec![
1758 MarkdownParagraphChunk::Image(Image {
1759 source_range: 0..81,
1760 link: Link::Web {
1761 url: "http://example.com/foo.png".to_string(),
1762 },
1763 alt_text: Some("foo".into()),
1764 height: None,
1765 width: None,
1766 }),
1767 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1768 source_range: 0..81,
1769 contents: " Lorem Ipsum ".into(),
1770 highlights: Vec::new(),
1771 region_ranges: Vec::new(),
1772 regions: Vec::new(),
1773 }),
1774 MarkdownParagraphChunk::Image(Image {
1775 source_range: 0..81,
1776 link: Link::Web {
1777 url: "http://example.com/bar.png".to_string(),
1778 },
1779 alt_text: Some("bar".into()),
1780 height: None,
1781 width: None,
1782 })
1783 ]
1784 );
1785 }
1786
1787 #[test]
1788 fn test_parse_html_element_dimension() {
1789 // Test percentage values
1790 assert_eq!(
1791 MarkdownParser::parse_html_element_dimension("50%"),
1792 Some(DefiniteLength::Fraction(0.5))
1793 );
1794 assert_eq!(
1795 MarkdownParser::parse_html_element_dimension("100%"),
1796 Some(DefiniteLength::Fraction(1.0))
1797 );
1798 assert_eq!(
1799 MarkdownParser::parse_html_element_dimension("25%"),
1800 Some(DefiniteLength::Fraction(0.25))
1801 );
1802 assert_eq!(
1803 MarkdownParser::parse_html_element_dimension("0%"),
1804 Some(DefiniteLength::Fraction(0.0))
1805 );
1806
1807 // Test pixel values
1808 assert_eq!(
1809 MarkdownParser::parse_html_element_dimension("100px"),
1810 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1811 );
1812 assert_eq!(
1813 MarkdownParser::parse_html_element_dimension("50px"),
1814 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1815 );
1816 assert_eq!(
1817 MarkdownParser::parse_html_element_dimension("0px"),
1818 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1819 );
1820
1821 // Test values without units (should be treated as pixels)
1822 assert_eq!(
1823 MarkdownParser::parse_html_element_dimension("100"),
1824 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1825 );
1826 assert_eq!(
1827 MarkdownParser::parse_html_element_dimension("42"),
1828 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1829 );
1830
1831 // Test invalid values
1832 assert_eq!(
1833 MarkdownParser::parse_html_element_dimension("invalid"),
1834 None
1835 );
1836 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1837 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1838 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1839 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1840 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1841
1842 // Test decimal values
1843 assert_eq!(
1844 MarkdownParser::parse_html_element_dimension("50.5%"),
1845 Some(DefiniteLength::Fraction(0.505))
1846 );
1847 assert_eq!(
1848 MarkdownParser::parse_html_element_dimension("100.25px"),
1849 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1850 );
1851 assert_eq!(
1852 MarkdownParser::parse_html_element_dimension("42.0"),
1853 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1854 );
1855 }
1856
1857 #[gpui::test]
1858 async fn test_html_unordered_list() {
1859 let parsed = parse(
1860 "<ul>
1861 <li>Item 1</li>
1862 <li>Item 2</li>
1863 </ul>",
1864 )
1865 .await;
1866
1867 assert_eq!(
1868 ParsedMarkdown {
1869 children: vec![
1870 nested_list_item(
1871 0..82,
1872 1,
1873 ParsedMarkdownListItemType::Unordered,
1874 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1875 ),
1876 nested_list_item(
1877 0..82,
1878 1,
1879 ParsedMarkdownListItemType::Unordered,
1880 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1881 ),
1882 ]
1883 },
1884 parsed
1885 );
1886 }
1887
1888 #[gpui::test]
1889 async fn test_html_ordered_list() {
1890 let parsed = parse(
1891 "<ol>
1892 <li>Item 1</li>
1893 <li>Item 2</li>
1894 </ol>",
1895 )
1896 .await;
1897
1898 assert_eq!(
1899 ParsedMarkdown {
1900 children: vec![
1901 nested_list_item(
1902 0..82,
1903 1,
1904 ParsedMarkdownListItemType::Ordered(1),
1905 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1906 ),
1907 nested_list_item(
1908 0..82,
1909 1,
1910 ParsedMarkdownListItemType::Ordered(2),
1911 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1912 ),
1913 ]
1914 },
1915 parsed
1916 );
1917 }
1918
1919 #[gpui::test]
1920 async fn test_html_nested_ordered_list() {
1921 let parsed = parse(
1922 "<ol>
1923 <li>Item 1</li>
1924 <li>Item 2
1925 <ol>
1926 <li>Sub-Item 1</li>
1927 <li>Sub-Item 2</li>
1928 </ol>
1929 </li>
1930 </ol>",
1931 )
1932 .await;
1933
1934 assert_eq!(
1935 ParsedMarkdown {
1936 children: vec![
1937 nested_list_item(
1938 0..216,
1939 1,
1940 ParsedMarkdownListItemType::Ordered(1),
1941 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1942 ),
1943 nested_list_item(
1944 0..216,
1945 1,
1946 ParsedMarkdownListItemType::Ordered(2),
1947 vec![
1948 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1949 nested_list_item(
1950 0..216,
1951 2,
1952 ParsedMarkdownListItemType::Ordered(1),
1953 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1954 ),
1955 nested_list_item(
1956 0..216,
1957 2,
1958 ParsedMarkdownListItemType::Ordered(2),
1959 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1960 ),
1961 ]
1962 ),
1963 ]
1964 },
1965 parsed
1966 );
1967 }
1968
1969 #[gpui::test]
1970 async fn test_html_nested_unordered_list() {
1971 let parsed = parse(
1972 "<ul>
1973 <li>Item 1</li>
1974 <li>Item 2
1975 <ul>
1976 <li>Sub-Item 1</li>
1977 <li>Sub-Item 2</li>
1978 </ul>
1979 </li>
1980 </ul>",
1981 )
1982 .await;
1983
1984 assert_eq!(
1985 ParsedMarkdown {
1986 children: vec![
1987 nested_list_item(
1988 0..216,
1989 1,
1990 ParsedMarkdownListItemType::Unordered,
1991 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1992 ),
1993 nested_list_item(
1994 0..216,
1995 1,
1996 ParsedMarkdownListItemType::Unordered,
1997 vec![
1998 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1999 nested_list_item(
2000 0..216,
2001 2,
2002 ParsedMarkdownListItemType::Unordered,
2003 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2004 ),
2005 nested_list_item(
2006 0..216,
2007 2,
2008 ParsedMarkdownListItemType::Unordered,
2009 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2010 ),
2011 ]
2012 ),
2013 ]
2014 },
2015 parsed
2016 );
2017 }
2018
2019 #[gpui::test]
2020 async fn test_inline_html_image_tag() {
2021 let parsed =
2022 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
2023 .await;
2024
2025 assert_eq!(
2026 ParsedMarkdown {
2027 children: vec![ParsedMarkdownElement::Paragraph(vec![
2028 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2029 source_range: 0..71,
2030 contents: "Some text".into(),
2031 highlights: Default::default(),
2032 region_ranges: Default::default(),
2033 regions: Default::default()
2034 }),
2035 MarkdownParagraphChunk::Image(Image {
2036 source_range: 0..71,
2037 link: Link::Web {
2038 url: "http://example.com/foo.png".to_string(),
2039 },
2040 alt_text: None,
2041 height: None,
2042 width: None,
2043 }),
2044 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2045 source_range: 0..71,
2046 contents: " some more text".into(),
2047 highlights: Default::default(),
2048 region_ranges: Default::default(),
2049 regions: Default::default()
2050 }),
2051 ])]
2052 },
2053 parsed
2054 );
2055 }
2056
2057 #[gpui::test]
2058 async fn test_html_block_quote() {
2059 let parsed = parse(
2060 "<blockquote>
2061 <p>some description</p>
2062 </blockquote>",
2063 )
2064 .await;
2065
2066 assert_eq!(
2067 ParsedMarkdown {
2068 children: vec![block_quote(
2069 vec![ParsedMarkdownElement::Paragraph(text(
2070 "some description",
2071 0..78
2072 ))],
2073 0..78,
2074 )]
2075 },
2076 parsed
2077 );
2078 }
2079
2080 #[gpui::test]
2081 async fn test_html_nested_block_quote() {
2082 let parsed = parse(
2083 "<blockquote>
2084 <p>some description</p>
2085 <blockquote>
2086 <p>second description</p>
2087 </blockquote>
2088 </blockquote>",
2089 )
2090 .await;
2091
2092 assert_eq!(
2093 ParsedMarkdown {
2094 children: vec![block_quote(
2095 vec![
2096 ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
2097 block_quote(
2098 vec![ParsedMarkdownElement::Paragraph(text(
2099 "second description",
2100 0..179
2101 ))],
2102 0..179,
2103 )
2104 ],
2105 0..179,
2106 )]
2107 },
2108 parsed
2109 );
2110 }
2111
2112 #[gpui::test]
2113 async fn test_html_table() {
2114 let parsed = parse(
2115 "<table>
2116 <thead>
2117 <tr>
2118 <th>Id</th>
2119 <th>Name</th>
2120 </tr>
2121 </thead>
2122 <tbody>
2123 <tr>
2124 <td>1</td>
2125 <td>Chris</td>
2126 </tr>
2127 <tr>
2128 <td>2</td>
2129 <td>Dennis</td>
2130 </tr>
2131 </tbody>
2132 </table>",
2133 )
2134 .await;
2135
2136 assert_eq!(
2137 ParsedMarkdown {
2138 children: vec![ParsedMarkdownElement::Table(table(
2139 0..366,
2140 None,
2141 vec![row(vec![
2142 column(
2143 1,
2144 1,
2145 true,
2146 text("Id", 0..366),
2147 ParsedMarkdownTableAlignment::Center
2148 ),
2149 column(
2150 1,
2151 1,
2152 true,
2153 text("Name ", 0..366),
2154 ParsedMarkdownTableAlignment::Center
2155 )
2156 ])],
2157 vec![
2158 row(vec![
2159 column(
2160 1,
2161 1,
2162 false,
2163 text("1", 0..366),
2164 ParsedMarkdownTableAlignment::None
2165 ),
2166 column(
2167 1,
2168 1,
2169 false,
2170 text("Chris", 0..366),
2171 ParsedMarkdownTableAlignment::None
2172 )
2173 ]),
2174 row(vec![
2175 column(
2176 1,
2177 1,
2178 false,
2179 text("2", 0..366),
2180 ParsedMarkdownTableAlignment::None
2181 ),
2182 column(
2183 1,
2184 1,
2185 false,
2186 text("Dennis", 0..366),
2187 ParsedMarkdownTableAlignment::None
2188 )
2189 ]),
2190 ],
2191 ))],
2192 },
2193 parsed
2194 );
2195 }
2196
2197 #[gpui::test]
2198 async fn test_html_table_with_caption() {
2199 let parsed = parse(
2200 "<table>
2201 <caption>My Table</caption>
2202 <tbody>
2203 <tr>
2204 <td>1</td>
2205 <td>Chris</td>
2206 </tr>
2207 <tr>
2208 <td>2</td>
2209 <td>Dennis</td>
2210 </tr>
2211 </tbody>
2212 </table>",
2213 )
2214 .await;
2215
2216 assert_eq!(
2217 ParsedMarkdown {
2218 children: vec![ParsedMarkdownElement::Table(table(
2219 0..280,
2220 Some(vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2221 source_range: 0..280,
2222 contents: "My Table".into(),
2223 highlights: Default::default(),
2224 region_ranges: Default::default(),
2225 regions: Default::default()
2226 })]),
2227 vec![],
2228 vec![
2229 row(vec![
2230 column(
2231 1,
2232 1,
2233 false,
2234 text("1", 0..280),
2235 ParsedMarkdownTableAlignment::None
2236 ),
2237 column(
2238 1,
2239 1,
2240 false,
2241 text("Chris", 0..280),
2242 ParsedMarkdownTableAlignment::None
2243 )
2244 ]),
2245 row(vec![
2246 column(
2247 1,
2248 1,
2249 false,
2250 text("2", 0..280),
2251 ParsedMarkdownTableAlignment::None
2252 ),
2253 column(
2254 1,
2255 1,
2256 false,
2257 text("Dennis", 0..280),
2258 ParsedMarkdownTableAlignment::None
2259 )
2260 ]),
2261 ],
2262 ))],
2263 },
2264 parsed
2265 );
2266 }
2267
2268 #[gpui::test]
2269 async fn test_html_table_without_headings() {
2270 let parsed = parse(
2271 "<table>
2272 <tbody>
2273 <tr>
2274 <td>1</td>
2275 <td>Chris</td>
2276 </tr>
2277 <tr>
2278 <td>2</td>
2279 <td>Dennis</td>
2280 </tr>
2281 </tbody>
2282 </table>",
2283 )
2284 .await;
2285
2286 assert_eq!(
2287 ParsedMarkdown {
2288 children: vec![ParsedMarkdownElement::Table(table(
2289 0..240,
2290 None,
2291 vec![],
2292 vec![
2293 row(vec![
2294 column(
2295 1,
2296 1,
2297 false,
2298 text("1", 0..240),
2299 ParsedMarkdownTableAlignment::None
2300 ),
2301 column(
2302 1,
2303 1,
2304 false,
2305 text("Chris", 0..240),
2306 ParsedMarkdownTableAlignment::None
2307 )
2308 ]),
2309 row(vec![
2310 column(
2311 1,
2312 1,
2313 false,
2314 text("2", 0..240),
2315 ParsedMarkdownTableAlignment::None
2316 ),
2317 column(
2318 1,
2319 1,
2320 false,
2321 text("Dennis", 0..240),
2322 ParsedMarkdownTableAlignment::None
2323 )
2324 ]),
2325 ],
2326 ))],
2327 },
2328 parsed
2329 );
2330 }
2331
2332 #[gpui::test]
2333 async fn test_html_table_without_body() {
2334 let parsed = parse(
2335 "<table>
2336 <thead>
2337 <tr>
2338 <th>Id</th>
2339 <th>Name</th>
2340 </tr>
2341 </thead>
2342 </table>",
2343 )
2344 .await;
2345
2346 assert_eq!(
2347 ParsedMarkdown {
2348 children: vec![ParsedMarkdownElement::Table(table(
2349 0..150,
2350 None,
2351 vec![row(vec![
2352 column(
2353 1,
2354 1,
2355 true,
2356 text("Id", 0..150),
2357 ParsedMarkdownTableAlignment::Center
2358 ),
2359 column(
2360 1,
2361 1,
2362 true,
2363 text("Name", 0..150),
2364 ParsedMarkdownTableAlignment::Center
2365 )
2366 ])],
2367 vec![],
2368 ))],
2369 },
2370 parsed
2371 );
2372 }
2373
2374 #[gpui::test]
2375 async fn test_html_heading_tags() {
2376 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2377
2378 assert_eq!(
2379 ParsedMarkdown {
2380 children: vec![
2381 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2382 level: HeadingLevel::H1,
2383 source_range: 0..96,
2384 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2385 source_range: 0..96,
2386 contents: "Heading".into(),
2387 highlights: Vec::default(),
2388 region_ranges: Vec::default(),
2389 regions: Vec::default()
2390 })],
2391 }),
2392 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2393 level: HeadingLevel::H2,
2394 source_range: 0..96,
2395 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2396 source_range: 0..96,
2397 contents: "Heading".into(),
2398 highlights: Vec::default(),
2399 region_ranges: Vec::default(),
2400 regions: Vec::default()
2401 })],
2402 }),
2403 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2404 level: HeadingLevel::H3,
2405 source_range: 0..96,
2406 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2407 source_range: 0..96,
2408 contents: "Heading".into(),
2409 highlights: Vec::default(),
2410 region_ranges: Vec::default(),
2411 regions: Vec::default()
2412 })],
2413 }),
2414 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2415 level: HeadingLevel::H4,
2416 source_range: 0..96,
2417 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2418 source_range: 0..96,
2419 contents: "Heading".into(),
2420 highlights: Vec::default(),
2421 region_ranges: Vec::default(),
2422 regions: Vec::default()
2423 })],
2424 }),
2425 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2426 level: HeadingLevel::H5,
2427 source_range: 0..96,
2428 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2429 source_range: 0..96,
2430 contents: "Heading".into(),
2431 highlights: Vec::default(),
2432 region_ranges: Vec::default(),
2433 regions: Vec::default()
2434 })],
2435 }),
2436 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2437 level: HeadingLevel::H6,
2438 source_range: 0..96,
2439 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2440 source_range: 0..96,
2441 contents: "Heading".into(),
2442 highlights: Vec::default(),
2443 region_ranges: Vec::default(),
2444 regions: Vec::default()
2445 })],
2446 }),
2447 ],
2448 },
2449 parsed
2450 );
2451 }
2452
2453 #[gpui::test]
2454 async fn test_html_image_tag() {
2455 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2456
2457 assert_eq!(
2458 ParsedMarkdown {
2459 children: vec![ParsedMarkdownElement::Image(Image {
2460 source_range: 0..40,
2461 link: Link::Web {
2462 url: "http://example.com/foo.png".to_string(),
2463 },
2464 alt_text: None,
2465 height: None,
2466 width: None,
2467 })]
2468 },
2469 parsed
2470 );
2471 }
2472
2473 #[gpui::test]
2474 async fn test_html_image_tag_with_alt_text() {
2475 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2476
2477 assert_eq!(
2478 ParsedMarkdown {
2479 children: vec![ParsedMarkdownElement::Image(Image {
2480 source_range: 0..50,
2481 link: Link::Web {
2482 url: "http://example.com/foo.png".to_string(),
2483 },
2484 alt_text: Some("Foo".into()),
2485 height: None,
2486 width: None,
2487 })]
2488 },
2489 parsed
2490 );
2491 }
2492
2493 #[gpui::test]
2494 async fn test_html_image_tag_with_height_and_width() {
2495 let parsed =
2496 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2497
2498 assert_eq!(
2499 ParsedMarkdown {
2500 children: vec![ParsedMarkdownElement::Image(Image {
2501 source_range: 0..65,
2502 link: Link::Web {
2503 url: "http://example.com/foo.png".to_string(),
2504 },
2505 alt_text: None,
2506 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2507 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2508 })]
2509 },
2510 parsed
2511 );
2512 }
2513
2514 #[gpui::test]
2515 async fn test_html_image_style_tag_with_height_and_width() {
2516 let parsed = parse(
2517 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2518 )
2519 .await;
2520
2521 assert_eq!(
2522 ParsedMarkdown {
2523 children: vec![ParsedMarkdownElement::Image(Image {
2524 source_range: 0..75,
2525 link: Link::Web {
2526 url: "http://example.com/foo.png".to_string(),
2527 },
2528 alt_text: None,
2529 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2530 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2531 })]
2532 },
2533 parsed
2534 );
2535 }
2536
2537 #[gpui::test]
2538 async fn test_header_only_table() {
2539 let markdown = "\
2540| Header 1 | Header 2 |
2541|----------|----------|
2542
2543Some other content
2544";
2545
2546 let expected_table = table(
2547 0..48,
2548 None,
2549 vec![row(vec![
2550 column(
2551 1,
2552 1,
2553 true,
2554 text("Header 1", 1..11),
2555 ParsedMarkdownTableAlignment::None,
2556 ),
2557 column(
2558 1,
2559 1,
2560 true,
2561 text("Header 2", 12..22),
2562 ParsedMarkdownTableAlignment::None,
2563 ),
2564 ])],
2565 vec![],
2566 );
2567
2568 assert_eq!(
2569 parse(markdown).await.children[0],
2570 ParsedMarkdownElement::Table(expected_table)
2571 );
2572 }
2573
2574 #[gpui::test]
2575 async fn test_basic_table() {
2576 let markdown = "\
2577| Header 1 | Header 2 |
2578|----------|----------|
2579| Cell 1 | Cell 2 |
2580| Cell 3 | Cell 4 |";
2581
2582 let expected_table = table(
2583 0..95,
2584 None,
2585 vec![row(vec![
2586 column(
2587 1,
2588 1,
2589 true,
2590 text("Header 1", 1..11),
2591 ParsedMarkdownTableAlignment::None,
2592 ),
2593 column(
2594 1,
2595 1,
2596 true,
2597 text("Header 2", 12..22),
2598 ParsedMarkdownTableAlignment::None,
2599 ),
2600 ])],
2601 vec![
2602 row(vec![
2603 column(
2604 1,
2605 1,
2606 false,
2607 text("Cell 1", 49..59),
2608 ParsedMarkdownTableAlignment::None,
2609 ),
2610 column(
2611 1,
2612 1,
2613 false,
2614 text("Cell 2", 60..70),
2615 ParsedMarkdownTableAlignment::None,
2616 ),
2617 ]),
2618 row(vec![
2619 column(
2620 1,
2621 1,
2622 false,
2623 text("Cell 3", 73..83),
2624 ParsedMarkdownTableAlignment::None,
2625 ),
2626 column(
2627 1,
2628 1,
2629 false,
2630 text("Cell 4", 84..94),
2631 ParsedMarkdownTableAlignment::None,
2632 ),
2633 ]),
2634 ],
2635 );
2636
2637 assert_eq!(
2638 parse(markdown).await.children[0],
2639 ParsedMarkdownElement::Table(expected_table)
2640 );
2641 }
2642
2643 #[gpui::test]
2644 async fn test_list_basic() {
2645 let parsed = parse(
2646 "\
2647* Item 1
2648* Item 2
2649* Item 3
2650",
2651 )
2652 .await;
2653
2654 assert_eq!(
2655 parsed.children,
2656 vec![
2657 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2658 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2659 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2660 ],
2661 );
2662 }
2663
2664 #[gpui::test]
2665 async fn test_list_with_tasks() {
2666 let parsed = parse(
2667 "\
2668- [ ] TODO
2669- [x] Checked
2670",
2671 )
2672 .await;
2673
2674 assert_eq!(
2675 parsed.children,
2676 vec![
2677 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2678 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2679 ],
2680 );
2681 }
2682
2683 #[gpui::test]
2684 async fn test_list_with_indented_task() {
2685 let parsed = parse(
2686 "\
2687- [ ] TODO
2688 - [x] Checked
2689 - Unordered
2690 1. Number 1
2691 1. Number 2
26921. Number A
2693",
2694 )
2695 .await;
2696
2697 assert_eq!(
2698 parsed.children,
2699 vec![
2700 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2701 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2702 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2703 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2704 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2705 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2706 ],
2707 );
2708 }
2709
2710 #[gpui::test]
2711 async fn test_list_with_linebreak_is_handled_correctly() {
2712 let parsed = parse(
2713 "\
2714- [ ] Task 1
2715
2716- [x] Task 2
2717",
2718 )
2719 .await;
2720
2721 assert_eq!(
2722 parsed.children,
2723 vec![
2724 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2725 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2726 ],
2727 );
2728 }
2729
2730 #[gpui::test]
2731 async fn test_list_nested() {
2732 let parsed = parse(
2733 "\
2734* Item 1
2735* Item 2
2736* Item 3
2737
27381. Hello
27391. Two
2740 1. Three
27412. Four
27423. Five
2743
2744* First
2745 1. Hello
2746 1. Goodbyte
2747 - Inner
2748 - Inner
2749 2. Goodbyte
2750 - Next item empty
2751 -
2752* Last
2753",
2754 )
2755 .await;
2756
2757 assert_eq!(
2758 parsed.children,
2759 vec![
2760 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2761 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2762 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2763 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2764 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2765 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2766 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2767 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2768 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2769 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2770 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2771 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2772 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2773 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2774 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2775 list_item(186..190, 3, Unordered, vec![]),
2776 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2777 ]
2778 );
2779 }
2780
2781 #[gpui::test]
2782 async fn test_list_with_nested_content() {
2783 let parsed = parse(
2784 "\
2785* This is a list item with two paragraphs.
2786
2787 This is the second paragraph in the list item.
2788",
2789 )
2790 .await;
2791
2792 assert_eq!(
2793 parsed.children,
2794 vec![list_item(
2795 0..96,
2796 1,
2797 Unordered,
2798 vec![
2799 p("This is a list item with two paragraphs.", 4..44),
2800 p("This is the second paragraph in the list item.", 50..97)
2801 ],
2802 ),],
2803 );
2804 }
2805
2806 #[gpui::test]
2807 async fn test_list_item_with_inline_html() {
2808 let parsed = parse(
2809 "\
2810* This is a list item with an inline HTML <sometag>tag</sometag>.
2811",
2812 )
2813 .await;
2814
2815 assert_eq!(
2816 parsed.children,
2817 vec![list_item(
2818 0..67,
2819 1,
2820 Unordered,
2821 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2822 ),],
2823 );
2824 }
2825
2826 #[gpui::test]
2827 async fn test_nested_list_with_paragraph_inside() {
2828 let parsed = parse(
2829 "\
28301. a
2831 1. b
2832 1. c
2833
2834 text
2835
2836 1. d
2837",
2838 )
2839 .await;
2840
2841 assert_eq!(
2842 parsed.children,
2843 vec![
2844 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2845 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2846 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2847 p("text", 32..37),
2848 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2849 ],
2850 );
2851 }
2852
2853 #[gpui::test]
2854 async fn test_list_with_leading_text() {
2855 let parsed = parse(
2856 "\
2857* `code`
2858* **bold**
2859* [link](https://example.com)
2860",
2861 )
2862 .await;
2863
2864 assert_eq!(
2865 parsed.children,
2866 vec![
2867 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2868 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2869 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2870 ],
2871 );
2872 }
2873
2874 #[gpui::test]
2875 async fn test_simple_block_quote() {
2876 let parsed = parse("> Simple block quote with **styled text**").await;
2877
2878 assert_eq!(
2879 parsed.children,
2880 vec![block_quote(
2881 vec![p("Simple block quote with styled text", 2..41)],
2882 0..41
2883 )]
2884 );
2885 }
2886
2887 #[gpui::test]
2888 async fn test_simple_block_quote_with_multiple_lines() {
2889 let parsed = parse(
2890 "\
2891> # Heading
2892> More
2893> text
2894>
2895> More text
2896",
2897 )
2898 .await;
2899
2900 assert_eq!(
2901 parsed.children,
2902 vec![block_quote(
2903 vec![
2904 h1(text("Heading", 4..11), 2..12),
2905 p("More text", 14..26),
2906 p("More text", 30..40)
2907 ],
2908 0..40
2909 )]
2910 );
2911 }
2912
2913 #[gpui::test]
2914 async fn test_nested_block_quote() {
2915 let parsed = parse(
2916 "\
2917> A
2918>
2919> > # B
2920>
2921> C
2922
2923More text
2924",
2925 )
2926 .await;
2927
2928 assert_eq!(
2929 parsed.children,
2930 vec![
2931 block_quote(
2932 vec![
2933 p("A", 2..4),
2934 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2935 p("C", 18..20)
2936 ],
2937 0..20
2938 ),
2939 p("More text", 21..31)
2940 ]
2941 );
2942 }
2943
2944 #[gpui::test]
2945 async fn test_code_block() {
2946 let parsed = parse(
2947 "\
2948```
2949fn main() {
2950 return 0;
2951}
2952```
2953",
2954 )
2955 .await;
2956
2957 assert_eq!(
2958 parsed.children,
2959 vec![code_block(
2960 None,
2961 "fn main() {\n return 0;\n}",
2962 0..35,
2963 None
2964 )]
2965 );
2966 }
2967
2968 #[gpui::test]
2969 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2970 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2971 language_registry.add(rust_lang());
2972
2973 let parsed = parse_markdown(
2974 "\
2975```rust
2976fn main() {
2977 return 0;
2978}
2979```
2980",
2981 None,
2982 Some(language_registry),
2983 )
2984 .await;
2985
2986 assert_eq!(
2987 parsed.children,
2988 vec![code_block(
2989 Some("rust".to_string()),
2990 "fn main() {\n return 0;\n}",
2991 0..39,
2992 Some(vec![])
2993 )]
2994 );
2995 }
2996
2997 fn rust_lang() -> Arc<Language> {
2998 Arc::new(Language::new(
2999 LanguageConfig {
3000 name: "Rust".into(),
3001 matcher: LanguageMatcher {
3002 path_suffixes: vec!["rs".into()],
3003 ..Default::default()
3004 },
3005 collapsed_placeholder: " /* ... */ ".to_string(),
3006 ..Default::default()
3007 },
3008 Some(tree_sitter_rust::LANGUAGE.into()),
3009 ))
3010 }
3011
3012 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3013 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3014 source_range,
3015 level: HeadingLevel::H1,
3016 contents,
3017 })
3018 }
3019
3020 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3021 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3022 source_range,
3023 level: HeadingLevel::H2,
3024 contents,
3025 })
3026 }
3027
3028 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3029 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3030 source_range,
3031 level: HeadingLevel::H3,
3032 contents,
3033 })
3034 }
3035
3036 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
3037 ParsedMarkdownElement::Paragraph(text(contents, source_range))
3038 }
3039
3040 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
3041 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
3042 highlights: Vec::new(),
3043 region_ranges: Vec::new(),
3044 regions: Vec::new(),
3045 source_range,
3046 contents: contents.to_string().into(),
3047 })]
3048 }
3049
3050 fn block_quote(
3051 children: Vec<ParsedMarkdownElement>,
3052 source_range: Range<usize>,
3053 ) -> ParsedMarkdownElement {
3054 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
3055 source_range,
3056 children,
3057 })
3058 }
3059
3060 fn code_block(
3061 language: Option<String>,
3062 code: &str,
3063 source_range: Range<usize>,
3064 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
3065 ) -> ParsedMarkdownElement {
3066 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
3067 source_range,
3068 language,
3069 contents: code.to_string().into(),
3070 highlights,
3071 })
3072 }
3073
3074 fn list_item(
3075 source_range: Range<usize>,
3076 depth: u16,
3077 item_type: ParsedMarkdownListItemType,
3078 content: Vec<ParsedMarkdownElement>,
3079 ) -> ParsedMarkdownElement {
3080 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3081 source_range,
3082 item_type,
3083 depth,
3084 content,
3085 nested: false,
3086 })
3087 }
3088
3089 fn nested_list_item(
3090 source_range: Range<usize>,
3091 depth: u16,
3092 item_type: ParsedMarkdownListItemType,
3093 content: Vec<ParsedMarkdownElement>,
3094 ) -> ParsedMarkdownElement {
3095 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3096 source_range,
3097 item_type,
3098 depth,
3099 content,
3100 nested: true,
3101 })
3102 }
3103
3104 fn table(
3105 source_range: Range<usize>,
3106 caption: Option<MarkdownParagraph>,
3107 header: Vec<ParsedMarkdownTableRow>,
3108 body: Vec<ParsedMarkdownTableRow>,
3109 ) -> ParsedMarkdownTable {
3110 ParsedMarkdownTable {
3111 source_range,
3112 header,
3113 body,
3114 caption,
3115 }
3116 }
3117
3118 fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
3119 ParsedMarkdownTableRow { columns }
3120 }
3121
3122 fn column(
3123 col_span: usize,
3124 row_span: usize,
3125 is_header: bool,
3126 children: MarkdownParagraph,
3127 alignment: ParsedMarkdownTableAlignment,
3128 ) -> ParsedMarkdownTableColumn {
3129 ParsedMarkdownTableColumn {
3130 col_span,
3131 row_span,
3132 is_header,
3133 children,
3134 alignment,
3135 }
3136 }
3137
3138 impl PartialEq for ParsedMarkdownTable {
3139 fn eq(&self, other: &Self) -> bool {
3140 self.source_range == other.source_range
3141 && self.header == other.header
3142 && self.body == other.body
3143 }
3144 }
3145
3146 impl PartialEq for ParsedMarkdownText {
3147 fn eq(&self, other: &Self) -> bool {
3148 self.source_range == other.source_range && self.contents == other.contents
3149 }
3150 }
3151}