1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15
16pub async fn parse_markdown(
17 markdown_input: &str,
18 file_location_directory: Option<PathBuf>,
19 language_registry: Option<Arc<LanguageRegistry>>,
20) -> ParsedMarkdown {
21 let mut options = Options::all();
22 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
23
24 let parser = Parser::new_ext(markdown_input, options);
25 let parser = MarkdownParser::new(
26 parser.into_offset_iter().collect(),
27 file_location_directory,
28 language_registry,
29 );
30 let renderer = parser.parse_document().await;
31 ParsedMarkdown {
32 children: renderer.parsed,
33 }
34}
35
36fn cleanup_html(source: &str) -> Vec<u8> {
37 let mut writer = std::io::Cursor::new(Vec::new());
38 let mut reader = std::io::Cursor::new(source);
39 let mut minify = Minifier::new(
40 &mut writer,
41 MinifierOptions {
42 omit_doctype: true,
43 collapse_whitespace: true,
44 ..Default::default()
45 },
46 );
47 if let Ok(()) = minify.minify(&mut reader) {
48 writer.into_inner()
49 } else {
50 source.bytes().collect()
51 }
52}
53
54struct MarkdownParser<'a> {
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 /// The current index in the tokens array
57 cursor: usize,
58 /// The blocks that we have successfully parsed so far
59 parsed: Vec<ParsedMarkdownElement>,
60 file_location_directory: Option<PathBuf>,
61 language_registry: Option<Arc<LanguageRegistry>>,
62}
63
64#[derive(Debug)]
65struct ParseHtmlNodeContext {
66 list_item_depth: u16,
67}
68
69impl Default for ParseHtmlNodeContext {
70 fn default() -> Self {
71 Self { list_item_depth: 1 }
72 }
73}
74
75struct MarkdownListItem {
76 content: Vec<ParsedMarkdownElement>,
77 item_type: ParsedMarkdownListItemType,
78}
79
80impl Default for MarkdownListItem {
81 fn default() -> Self {
82 Self {
83 content: Vec::new(),
84 item_type: ParsedMarkdownListItemType::Unordered,
85 }
86 }
87}
88
89impl<'a> MarkdownParser<'a> {
90 fn new(
91 tokens: Vec<(Event<'a>, Range<usize>)>,
92 file_location_directory: Option<PathBuf>,
93 language_registry: Option<Arc<LanguageRegistry>>,
94 ) -> Self {
95 Self {
96 tokens,
97 file_location_directory,
98 language_registry,
99 cursor: 0,
100 parsed: vec![],
101 }
102 }
103
104 fn eof(&self) -> bool {
105 if self.tokens.is_empty() {
106 return true;
107 }
108 self.cursor >= self.tokens.len() - 1
109 }
110
111 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
112 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
113 return self.tokens.last();
114 }
115 self.tokens.get(self.cursor + steps)
116 }
117
118 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
119 if self.cursor == 0 || self.cursor > self.tokens.len() {
120 return None;
121 }
122 self.tokens.get(self.cursor - 1)
123 }
124
125 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
126 self.peek(0)
127 }
128
129 fn current_event(&self) -> Option<&Event<'_>> {
130 self.current().map(|(event, _)| event)
131 }
132
133 fn is_text_like(event: &Event) -> bool {
134 match event {
135 Event::Text(_)
136 // Represent an inline code block
137 | Event::Code(_)
138 | Event::Html(_)
139 | Event::InlineHtml(_)
140 | Event::FootnoteReference(_)
141 | Event::Start(Tag::Link { .. })
142 | Event::Start(Tag::Emphasis)
143 | Event::Start(Tag::Strong)
144 | Event::Start(Tag::Strikethrough)
145 | Event::Start(Tag::Image { .. }) => {
146 true
147 }
148 _ => false,
149 }
150 }
151
152 async fn parse_document(mut self) -> Self {
153 while !self.eof() {
154 if let Some(block) = self.parse_block().await {
155 self.parsed.extend(block);
156 } else {
157 self.cursor += 1;
158 }
159 }
160 self
161 }
162
163 #[async_recursion]
164 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
165 let (current, source_range) = self.current().unwrap();
166 let source_range = source_range.clone();
167 match current {
168 Event::Start(tag) => match tag {
169 Tag::Paragraph => {
170 self.cursor += 1;
171 let text = self.parse_text(false, Some(source_range));
172 Some(vec![ParsedMarkdownElement::Paragraph(text)])
173 }
174 Tag::Heading { level, .. } => {
175 let level = *level;
176 self.cursor += 1;
177 let heading = self.parse_heading(level);
178 Some(vec![ParsedMarkdownElement::Heading(heading)])
179 }
180 Tag::Table(alignment) => {
181 let alignment = alignment.clone();
182 self.cursor += 1;
183 let table = self.parse_table(alignment);
184 Some(vec![ParsedMarkdownElement::Table(table)])
185 }
186 Tag::List(order) => {
187 let order = *order;
188 self.cursor += 1;
189 let list = self.parse_list(order).await;
190 Some(list)
191 }
192 Tag::BlockQuote(_kind) => {
193 self.cursor += 1;
194 let block_quote = self.parse_block_quote().await;
195 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
196 }
197 Tag::CodeBlock(kind) => {
198 let language = match kind {
199 pulldown_cmark::CodeBlockKind::Indented => None,
200 pulldown_cmark::CodeBlockKind::Fenced(language) => {
201 if language.is_empty() {
202 None
203 } else {
204 Some(language.to_string())
205 }
206 }
207 };
208
209 self.cursor += 1;
210
211 let code_block = self.parse_code_block(language).await?;
212 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
213 }
214 Tag::HtmlBlock => {
215 self.cursor += 1;
216
217 Some(self.parse_html_block().await)
218 }
219 _ => None,
220 },
221 Event::Rule => {
222 self.cursor += 1;
223 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
224 }
225 _ => None,
226 }
227 }
228
229 fn parse_text(
230 &mut self,
231 should_complete_on_soft_break: bool,
232 source_range: Option<Range<usize>>,
233 ) -> MarkdownParagraph {
234 let source_range = source_range.unwrap_or_else(|| {
235 self.current()
236 .map(|(_, range)| range.clone())
237 .unwrap_or_default()
238 });
239
240 let mut markdown_text_like = Vec::new();
241 let mut text = String::new();
242 let mut bold_depth = 0;
243 let mut italic_depth = 0;
244 let mut strikethrough_depth = 0;
245 let mut link: Option<Link> = None;
246 let mut image: Option<Image> = None;
247 let mut region_ranges: Vec<Range<usize>> = vec![];
248 let mut regions: Vec<ParsedRegion> = vec![];
249 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
250 let mut link_urls: Vec<String> = vec![];
251 let mut link_ranges: Vec<Range<usize>> = vec![];
252
253 loop {
254 if self.eof() {
255 break;
256 }
257
258 let (current, _) = self.current().unwrap();
259 let prev_len = text.len();
260 match current {
261 Event::SoftBreak => {
262 if should_complete_on_soft_break {
263 break;
264 }
265 text.push(' ');
266 }
267
268 Event::HardBreak => {
269 text.push('\n');
270 }
271
272 // We want to ignore any inline HTML tags in the text but keep
273 // the text between them
274 Event::InlineHtml(_) => {}
275
276 Event::Text(t) => {
277 text.push_str(t.as_ref());
278 let mut style = MarkdownHighlightStyle::default();
279
280 if bold_depth > 0 {
281 style.weight = FontWeight::BOLD;
282 }
283
284 if italic_depth > 0 {
285 style.italic = true;
286 }
287
288 if strikethrough_depth > 0 {
289 style.strikethrough = true;
290 }
291
292 let last_run_len = if let Some(link) = link.clone() {
293 region_ranges.push(prev_len..text.len());
294 regions.push(ParsedRegion {
295 code: false,
296 link: Some(link),
297 });
298 style.link = true;
299 prev_len
300 } else {
301 // Manually scan for links
302 let mut finder = linkify::LinkFinder::new();
303 finder.kinds(&[linkify::LinkKind::Url]);
304 let mut last_link_len = prev_len;
305 for link in finder.links(t) {
306 let start = prev_len + link.start();
307 let end = prev_len + link.end();
308 let range = start..end;
309 link_ranges.push(range.clone());
310 link_urls.push(link.as_str().to_string());
311
312 // If there is a style before we match a link, we have to add this to the highlighted ranges
313 if style != MarkdownHighlightStyle::default() && last_link_len < start {
314 highlights.push((
315 last_link_len..start,
316 MarkdownHighlight::Style(style.clone()),
317 ));
318 }
319
320 highlights.push((
321 range.clone(),
322 MarkdownHighlight::Style(MarkdownHighlightStyle {
323 underline: true,
324 ..style
325 }),
326 ));
327 region_ranges.push(range.clone());
328 regions.push(ParsedRegion {
329 code: false,
330 link: Some(Link::Web {
331 url: link.as_str().to_string(),
332 }),
333 });
334 last_link_len = end;
335 }
336 last_link_len
337 };
338
339 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
340 let mut new_highlight = true;
341 if let Some((last_range, last_style)) = highlights.last_mut()
342 && last_range.end == last_run_len
343 && last_style == &MarkdownHighlight::Style(style.clone())
344 {
345 last_range.end = text.len();
346 new_highlight = false;
347 }
348 if new_highlight {
349 highlights.push((
350 last_run_len..text.len(),
351 MarkdownHighlight::Style(style.clone()),
352 ));
353 }
354 }
355 }
356 Event::Code(t) => {
357 text.push_str(t.as_ref());
358 region_ranges.push(prev_len..text.len());
359
360 if link.is_some() {
361 highlights.push((
362 prev_len..text.len(),
363 MarkdownHighlight::Style(MarkdownHighlightStyle {
364 link: true,
365 ..Default::default()
366 }),
367 ));
368 }
369 regions.push(ParsedRegion {
370 code: true,
371 link: link.clone(),
372 });
373 }
374 Event::Start(tag) => match tag {
375 Tag::Emphasis => italic_depth += 1,
376 Tag::Strong => bold_depth += 1,
377 Tag::Strikethrough => strikethrough_depth += 1,
378 Tag::Link { dest_url, .. } => {
379 link = Link::identify(
380 self.file_location_directory.clone(),
381 dest_url.to_string(),
382 );
383 }
384 Tag::Image { dest_url, .. } => {
385 if !text.is_empty() {
386 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
387 source_range: source_range.clone(),
388 contents: mem::take(&mut text).into(),
389 highlights: mem::take(&mut highlights),
390 region_ranges: mem::take(&mut region_ranges),
391 regions: mem::take(&mut regions),
392 });
393 markdown_text_like.push(parsed_regions);
394 }
395 image = Image::identify(
396 dest_url.to_string(),
397 source_range.clone(),
398 self.file_location_directory.clone(),
399 );
400 }
401 _ => {
402 break;
403 }
404 },
405
406 Event::End(tag) => match tag {
407 TagEnd::Emphasis => italic_depth -= 1,
408 TagEnd::Strong => bold_depth -= 1,
409 TagEnd::Strikethrough => strikethrough_depth -= 1,
410 TagEnd::Link => {
411 link = None;
412 }
413 TagEnd::Image => {
414 if let Some(mut image) = image.take() {
415 if !text.is_empty() {
416 image.set_alt_text(std::mem::take(&mut text).into());
417 mem::take(&mut highlights);
418 mem::take(&mut region_ranges);
419 mem::take(&mut regions);
420 }
421 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
422 }
423 }
424 TagEnd::Paragraph => {
425 self.cursor += 1;
426 break;
427 }
428 _ => {
429 break;
430 }
431 },
432 _ => {
433 break;
434 }
435 }
436
437 self.cursor += 1;
438 }
439 if !text.is_empty() {
440 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
441 source_range,
442 contents: text.into(),
443 highlights,
444 regions,
445 region_ranges,
446 }));
447 }
448 markdown_text_like
449 }
450
451 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
452 let (_event, source_range) = self.previous().unwrap();
453 let source_range = source_range.clone();
454 let text = self.parse_text(true, None);
455
456 // Advance past the heading end tag
457 self.cursor += 1;
458
459 ParsedMarkdownHeading {
460 source_range,
461 level: match level {
462 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
463 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
464 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
465 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
466 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
467 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
468 },
469 contents: text,
470 }
471 }
472
473 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
474 let (_event, source_range) = self.previous().unwrap();
475 let source_range = source_range.clone();
476 let mut header = vec![];
477 let mut body = vec![];
478 let mut row_columns = vec![];
479 let mut in_header = true;
480 let column_alignments = alignment
481 .iter()
482 .map(Self::convert_alignment)
483 .collect::<Vec<_>>();
484
485 loop {
486 if self.eof() {
487 break;
488 }
489
490 let (current, source_range) = self.current().unwrap();
491 let source_range = source_range.clone();
492 match current {
493 Event::Start(Tag::TableHead)
494 | Event::Start(Tag::TableRow)
495 | Event::End(TagEnd::TableCell) => {
496 self.cursor += 1;
497 }
498 Event::Start(Tag::TableCell) => {
499 self.cursor += 1;
500 let cell_contents = self.parse_text(false, Some(source_range));
501 row_columns.push(ParsedMarkdownTableColumn {
502 col_span: 1,
503 row_span: 1,
504 is_header: in_header,
505 children: cell_contents,
506 alignment: column_alignments
507 .get(row_columns.len())
508 .copied()
509 .unwrap_or_default(),
510 });
511 }
512 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
513 self.cursor += 1;
514 let columns = std::mem::take(&mut row_columns);
515 if in_header {
516 header.push(ParsedMarkdownTableRow { columns: columns });
517 in_header = false;
518 } else {
519 body.push(ParsedMarkdownTableRow::with_columns(columns));
520 }
521 }
522 Event::End(TagEnd::Table) => {
523 self.cursor += 1;
524 break;
525 }
526 _ => {
527 break;
528 }
529 }
530 }
531
532 ParsedMarkdownTable {
533 source_range,
534 header,
535 body,
536 caption: None,
537 }
538 }
539
540 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
541 match alignment {
542 Alignment::None => ParsedMarkdownTableAlignment::None,
543 Alignment::Left => ParsedMarkdownTableAlignment::Left,
544 Alignment::Center => ParsedMarkdownTableAlignment::Center,
545 Alignment::Right => ParsedMarkdownTableAlignment::Right,
546 }
547 }
548
549 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
550 let (_, list_source_range) = self.previous().unwrap();
551
552 let mut items = Vec::new();
553 let mut items_stack = vec![MarkdownListItem::default()];
554 let mut depth = 1;
555 let mut order = order;
556 let mut order_stack = Vec::new();
557
558 let mut insertion_indices = FxHashMap::default();
559 let mut source_ranges = FxHashMap::default();
560 let mut start_item_range = list_source_range.clone();
561
562 while !self.eof() {
563 let (current, source_range) = self.current().unwrap();
564 match current {
565 Event::Start(Tag::List(new_order)) => {
566 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
567 insertion_indices.insert(depth, items.len());
568 }
569
570 // We will use the start of the nested list as the end for the current item's range,
571 // because we don't care about the hierarchy of list items
572 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
573 e.insert(start_item_range.start..source_range.start);
574 }
575
576 order_stack.push(order);
577 order = *new_order;
578 self.cursor += 1;
579 depth += 1;
580 }
581 Event::End(TagEnd::List(_)) => {
582 order = order_stack.pop().flatten();
583 self.cursor += 1;
584 depth -= 1;
585
586 if depth == 0 {
587 break;
588 }
589 }
590 Event::Start(Tag::Item) => {
591 start_item_range = source_range.clone();
592
593 self.cursor += 1;
594 items_stack.push(MarkdownListItem::default());
595
596 let mut task_list = None;
597 // Check for task list marker (`- [ ]` or `- [x]`)
598 if let Some(event) = self.current_event() {
599 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
600 if event == &Event::Start(Tag::Paragraph) {
601 self.cursor += 1;
602 }
603
604 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
605 task_list = Some((*checked, range.clone()));
606 self.cursor += 1;
607 }
608 }
609
610 if let Some((event, range)) = self.current() {
611 // This is a plain list item.
612 // For example `- some text` or `1. [Docs](./docs.md)`
613 if MarkdownParser::is_text_like(event) {
614 let text = self.parse_text(false, Some(range.clone()));
615 let block = ParsedMarkdownElement::Paragraph(text);
616 if let Some(content) = items_stack.last_mut() {
617 let item_type = if let Some((checked, range)) = task_list {
618 ParsedMarkdownListItemType::Task(checked, range)
619 } else if let Some(order) = order {
620 ParsedMarkdownListItemType::Ordered(order)
621 } else {
622 ParsedMarkdownListItemType::Unordered
623 };
624 content.item_type = item_type;
625 content.content.push(block);
626 }
627 } else {
628 let block = self.parse_block().await;
629 if let Some(block) = block
630 && let Some(list_item) = items_stack.last_mut()
631 {
632 list_item.content.extend(block);
633 }
634 }
635 }
636
637 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
638 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
639 self.cursor += 1;
640 }
641 }
642 Event::End(TagEnd::Item) => {
643 self.cursor += 1;
644
645 if let Some(current) = order {
646 order = Some(current + 1);
647 }
648
649 if let Some(list_item) = items_stack.pop() {
650 let source_range = source_ranges
651 .remove(&depth)
652 .unwrap_or(start_item_range.clone());
653
654 // We need to remove the last character of the source range, because it includes the newline character
655 let source_range = source_range.start..source_range.end - 1;
656 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
657 source_range,
658 content: list_item.content,
659 depth,
660 item_type: list_item.item_type,
661 nested: false,
662 });
663
664 if let Some(index) = insertion_indices.get(&depth) {
665 items.insert(*index, item);
666 insertion_indices.remove(&depth);
667 } else {
668 items.push(item);
669 }
670 }
671 }
672 _ => {
673 if depth == 0 {
674 break;
675 }
676 // This can only happen if a list item starts with more then one paragraph,
677 // or the list item contains blocks that should be rendered after the nested list items
678 let block = self.parse_block().await;
679 if let Some(block) = block {
680 if let Some(list_item) = items_stack.last_mut() {
681 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
682 if !insertion_indices.contains_key(&depth) {
683 list_item.content.extend(block);
684 continue;
685 }
686 }
687
688 // Otherwise we need to insert the block after all the nested items
689 // that have been parsed so far
690 items.extend(block);
691 } else {
692 self.cursor += 1;
693 }
694 }
695 }
696 }
697
698 items
699 }
700
701 #[async_recursion]
702 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
703 let (_event, source_range) = self.previous().unwrap();
704 let source_range = source_range.clone();
705 let mut nested_depth = 1;
706
707 let mut children: Vec<ParsedMarkdownElement> = vec![];
708
709 while !self.eof() {
710 let block = self.parse_block().await;
711
712 if let Some(block) = block {
713 children.extend(block);
714 } else {
715 break;
716 }
717
718 if self.eof() {
719 break;
720 }
721
722 let (current, _source_range) = self.current().unwrap();
723 match current {
724 // This is a nested block quote.
725 // Record that we're in a nested block quote and continue parsing.
726 // We don't need to advance the cursor since the next
727 // call to `parse_block` will handle it.
728 Event::Start(Tag::BlockQuote(_kind)) => {
729 nested_depth += 1;
730 }
731 Event::End(TagEnd::BlockQuote(_kind)) => {
732 nested_depth -= 1;
733 if nested_depth == 0 {
734 self.cursor += 1;
735 break;
736 }
737 }
738 _ => {}
739 };
740 }
741
742 ParsedMarkdownBlockQuote {
743 source_range,
744 children,
745 }
746 }
747
748 async fn parse_code_block(
749 &mut self,
750 language: Option<String>,
751 ) -> Option<ParsedMarkdownCodeBlock> {
752 let Some((_event, source_range)) = self.previous() else {
753 return None;
754 };
755
756 let source_range = source_range.clone();
757 let mut code = String::new();
758
759 while !self.eof() {
760 let Some((current, _source_range)) = self.current() else {
761 break;
762 };
763
764 match current {
765 Event::Text(text) => {
766 code.push_str(text);
767 self.cursor += 1;
768 }
769 Event::End(TagEnd::CodeBlock) => {
770 self.cursor += 1;
771 break;
772 }
773 _ => {
774 break;
775 }
776 }
777 }
778
779 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
780
781 let highlights = if let Some(language) = &language {
782 if let Some(registry) = &self.language_registry {
783 let rope: language::Rope = code.as_str().into();
784 registry
785 .language_for_name_or_extension(language)
786 .await
787 .map(|l| l.highlight_text(&rope, 0..code.len()))
788 .ok()
789 } else {
790 None
791 }
792 } else {
793 None
794 };
795
796 Some(ParsedMarkdownCodeBlock {
797 source_range,
798 contents: code.into(),
799 language,
800 highlights,
801 })
802 }
803
804 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
805 let mut elements = Vec::new();
806 let Some((_event, _source_range)) = self.previous() else {
807 return elements;
808 };
809
810 let mut html_source_range_start = None;
811 let mut html_source_range_end = None;
812 let mut html_buffer = String::new();
813
814 while !self.eof() {
815 let Some((current, source_range)) = self.current() else {
816 break;
817 };
818 let source_range = source_range.clone();
819 match current {
820 Event::Html(html) => {
821 html_source_range_start.get_or_insert(source_range.start);
822 html_source_range_end = Some(source_range.end);
823 html_buffer.push_str(html);
824 self.cursor += 1;
825 }
826 Event::End(TagEnd::CodeBlock) => {
827 self.cursor += 1;
828 break;
829 }
830 _ => {
831 break;
832 }
833 }
834 }
835
836 let bytes = cleanup_html(&html_buffer);
837
838 let mut cursor = std::io::Cursor::new(bytes);
839 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
840 .from_utf8()
841 .read_from(&mut cursor)
842 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
843 {
844 self.parse_html_node(
845 start..end,
846 &dom.document,
847 &mut elements,
848 &ParseHtmlNodeContext::default(),
849 );
850 }
851
852 elements
853 }
854
855 fn parse_html_node(
856 &self,
857 source_range: Range<usize>,
858 node: &Rc<markup5ever_rcdom::Node>,
859 elements: &mut Vec<ParsedMarkdownElement>,
860 context: &ParseHtmlNodeContext,
861 ) {
862 match &node.data {
863 markup5ever_rcdom::NodeData::Document => {
864 self.consume_children(source_range, node, elements, context);
865 }
866 markup5ever_rcdom::NodeData::Text { contents } => {
867 elements.push(ParsedMarkdownElement::Paragraph(vec![
868 MarkdownParagraphChunk::Text(ParsedMarkdownText {
869 source_range,
870 regions: Vec::default(),
871 region_ranges: Vec::default(),
872 highlights: Vec::default(),
873 contents: contents.borrow().to_string().into(),
874 }),
875 ]));
876 }
877 markup5ever_rcdom::NodeData::Comment { .. } => {}
878 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
879 if local_name!("img") == name.local {
880 if let Some(image) = self.extract_image(source_range, attrs) {
881 elements.push(ParsedMarkdownElement::Image(image));
882 }
883 } else if local_name!("p") == name.local {
884 let mut paragraph = MarkdownParagraph::new();
885 self.parse_paragraph(source_range, node, &mut paragraph);
886
887 if !paragraph.is_empty() {
888 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
889 }
890 } else if matches!(
891 name.local,
892 local_name!("h1")
893 | local_name!("h2")
894 | local_name!("h3")
895 | local_name!("h4")
896 | local_name!("h5")
897 | local_name!("h6")
898 ) {
899 let mut paragraph = MarkdownParagraph::new();
900 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
901
902 if !paragraph.is_empty() {
903 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
904 source_range,
905 level: match name.local {
906 local_name!("h1") => HeadingLevel::H1,
907 local_name!("h2") => HeadingLevel::H2,
908 local_name!("h3") => HeadingLevel::H3,
909 local_name!("h4") => HeadingLevel::H4,
910 local_name!("h5") => HeadingLevel::H5,
911 local_name!("h6") => HeadingLevel::H6,
912 _ => unreachable!(),
913 },
914 contents: paragraph,
915 }));
916 }
917 } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
918 if let Some(list_items) = self.extract_html_list(
919 node,
920 local_name!("ol") == name.local,
921 context.list_item_depth,
922 source_range,
923 ) {
924 elements.extend(list_items);
925 }
926 } else if local_name!("blockquote") == name.local {
927 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
928 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
929 }
930 } else if local_name!("table") == name.local {
931 if let Some(table) = self.extract_html_table(node, source_range) {
932 elements.push(ParsedMarkdownElement::Table(table));
933 }
934 } else {
935 self.consume_children(source_range, node, elements, context);
936 }
937 }
938 _ => {}
939 }
940 }
941
942 fn parse_paragraph(
943 &self,
944 source_range: Range<usize>,
945 node: &Rc<markup5ever_rcdom::Node>,
946 paragraph: &mut MarkdownParagraph,
947 ) {
948 match &node.data {
949 markup5ever_rcdom::NodeData::Text { contents } => {
950 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
951 source_range,
952 regions: Vec::default(),
953 region_ranges: Vec::default(),
954 highlights: Vec::default(),
955 contents: contents.borrow().to_string().into(),
956 }));
957 }
958 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
959 if local_name!("img") == name.local {
960 if let Some(image) = self.extract_image(source_range, attrs) {
961 paragraph.push(MarkdownParagraphChunk::Image(image));
962 }
963 } else {
964 self.consume_paragraph(source_range, node, paragraph);
965 }
966 }
967 _ => {}
968 }
969 }
970
971 fn consume_paragraph(
972 &self,
973 source_range: Range<usize>,
974 node: &Rc<markup5ever_rcdom::Node>,
975 paragraph: &mut MarkdownParagraph,
976 ) {
977 for node in node.children.borrow().iter() {
978 self.parse_paragraph(source_range.clone(), node, paragraph);
979 }
980 }
981
982 fn parse_table_row(
983 &self,
984 source_range: Range<usize>,
985 node: &Rc<markup5ever_rcdom::Node>,
986 ) -> Option<ParsedMarkdownTableRow> {
987 let mut columns = Vec::new();
988
989 match &node.data {
990 markup5ever_rcdom::NodeData::Element { name, .. } => {
991 if local_name!("tr") != name.local {
992 return None;
993 }
994
995 for node in node.children.borrow().iter() {
996 if let Some(column) = self.parse_table_column(source_range.clone(), node) {
997 columns.push(column);
998 }
999 }
1000 }
1001 _ => {}
1002 }
1003
1004 if columns.is_empty() {
1005 None
1006 } else {
1007 Some(ParsedMarkdownTableRow { columns })
1008 }
1009 }
1010
1011 fn parse_table_column(
1012 &self,
1013 source_range: Range<usize>,
1014 node: &Rc<markup5ever_rcdom::Node>,
1015 ) -> Option<ParsedMarkdownTableColumn> {
1016 match &node.data {
1017 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1018 if !matches!(name.local, local_name!("th") | local_name!("td")) {
1019 return None;
1020 }
1021
1022 let mut children = MarkdownParagraph::new();
1023 self.consume_paragraph(source_range, node, &mut children);
1024
1025 let is_header = matches!(name.local, local_name!("th"));
1026
1027 Some(ParsedMarkdownTableColumn {
1028 col_span: std::cmp::max(
1029 Self::attr_value(attrs, local_name!("colspan"))
1030 .and_then(|span| span.parse().ok())
1031 .unwrap_or(1),
1032 1,
1033 ),
1034 row_span: std::cmp::max(
1035 Self::attr_value(attrs, local_name!("rowspan"))
1036 .and_then(|span| span.parse().ok())
1037 .unwrap_or(1),
1038 1,
1039 ),
1040 is_header,
1041 children,
1042 alignment: Self::attr_value(attrs, local_name!("align"))
1043 .and_then(|align| match align.as_str() {
1044 "left" => Some(ParsedMarkdownTableAlignment::Left),
1045 "center" => Some(ParsedMarkdownTableAlignment::Center),
1046 "right" => Some(ParsedMarkdownTableAlignment::Right),
1047 _ => None,
1048 })
1049 .unwrap_or_else(|| {
1050 if is_header {
1051 ParsedMarkdownTableAlignment::Center
1052 } else {
1053 ParsedMarkdownTableAlignment::default()
1054 }
1055 }),
1056 })
1057 }
1058 _ => None,
1059 }
1060 }
1061
1062 fn consume_children(
1063 &self,
1064 source_range: Range<usize>,
1065 node: &Rc<markup5ever_rcdom::Node>,
1066 elements: &mut Vec<ParsedMarkdownElement>,
1067 context: &ParseHtmlNodeContext,
1068 ) {
1069 for node in node.children.borrow().iter() {
1070 self.parse_html_node(source_range.clone(), node, elements, context);
1071 }
1072 }
1073
1074 fn attr_value(
1075 attrs: &RefCell<Vec<html5ever::Attribute>>,
1076 name: html5ever::LocalName,
1077 ) -> Option<String> {
1078 attrs.borrow().iter().find_map(|attr| {
1079 if attr.name.local == name {
1080 Some(attr.value.to_string())
1081 } else {
1082 None
1083 }
1084 })
1085 }
1086
1087 fn extract_styles_from_attributes(
1088 attrs: &RefCell<Vec<html5ever::Attribute>>,
1089 ) -> HashMap<String, String> {
1090 let mut styles = HashMap::new();
1091
1092 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1093 for decl in style.split(';') {
1094 let mut parts = decl.splitn(2, ':');
1095 if let Some((key, value)) = parts.next().zip(parts.next()) {
1096 styles.insert(
1097 key.trim().to_lowercase().to_string(),
1098 value.trim().to_string(),
1099 );
1100 }
1101 }
1102 }
1103
1104 styles
1105 }
1106
1107 fn extract_image(
1108 &self,
1109 source_range: Range<usize>,
1110 attrs: &RefCell<Vec<html5ever::Attribute>>,
1111 ) -> Option<Image> {
1112 let src = Self::attr_value(attrs, local_name!("src"))?;
1113
1114 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1115
1116 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1117 image.set_alt_text(alt.into());
1118 }
1119
1120 let styles = Self::extract_styles_from_attributes(attrs);
1121
1122 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1123 .or_else(|| styles.get("width").cloned())
1124 .and_then(|width| Self::parse_html_element_dimension(&width))
1125 {
1126 image.set_width(width);
1127 }
1128
1129 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1130 .or_else(|| styles.get("height").cloned())
1131 .and_then(|height| Self::parse_html_element_dimension(&height))
1132 {
1133 image.set_height(height);
1134 }
1135
1136 Some(image)
1137 }
1138
1139 fn extract_html_list(
1140 &self,
1141 node: &Rc<markup5ever_rcdom::Node>,
1142 ordered: bool,
1143 depth: u16,
1144 source_range: Range<usize>,
1145 ) -> Option<Vec<ParsedMarkdownElement>> {
1146 let mut list_items = Vec::with_capacity(node.children.borrow().len());
1147
1148 for (index, node) in node.children.borrow().iter().enumerate() {
1149 match &node.data {
1150 markup5ever_rcdom::NodeData::Element { name, .. } => {
1151 if local_name!("li") != name.local {
1152 continue;
1153 }
1154
1155 let mut content = Vec::new();
1156 self.consume_children(
1157 source_range.clone(),
1158 node,
1159 &mut content,
1160 &ParseHtmlNodeContext {
1161 list_item_depth: depth + 1,
1162 },
1163 );
1164
1165 if !content.is_empty() {
1166 list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1167 depth,
1168 source_range: source_range.clone(),
1169 item_type: if ordered {
1170 ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1171 } else {
1172 ParsedMarkdownListItemType::Unordered
1173 },
1174 content,
1175 nested: true,
1176 }));
1177 }
1178 }
1179 _ => {}
1180 }
1181 }
1182
1183 if list_items.is_empty() {
1184 None
1185 } else {
1186 Some(list_items)
1187 }
1188 }
1189
1190 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1191 if value.ends_with("%") {
1192 value
1193 .trim_end_matches("%")
1194 .parse::<f32>()
1195 .ok()
1196 .map(|value| relative(value / 100.))
1197 } else {
1198 value
1199 .trim_end_matches("px")
1200 .parse()
1201 .ok()
1202 .map(|value| px(value).into())
1203 }
1204 }
1205
1206 fn extract_html_blockquote(
1207 &self,
1208 node: &Rc<markup5ever_rcdom::Node>,
1209 source_range: Range<usize>,
1210 ) -> Option<ParsedMarkdownBlockQuote> {
1211 let mut children = Vec::new();
1212 self.consume_children(
1213 source_range.clone(),
1214 node,
1215 &mut children,
1216 &ParseHtmlNodeContext::default(),
1217 );
1218
1219 if children.is_empty() {
1220 None
1221 } else {
1222 Some(ParsedMarkdownBlockQuote {
1223 children,
1224 source_range,
1225 })
1226 }
1227 }
1228
1229 fn extract_html_table(
1230 &self,
1231 node: &Rc<markup5ever_rcdom::Node>,
1232 source_range: Range<usize>,
1233 ) -> Option<ParsedMarkdownTable> {
1234 let mut header_rows = Vec::new();
1235 let mut body_rows = Vec::new();
1236 let mut caption = None;
1237
1238 // node should be a thead, tbody or caption element
1239 for node in node.children.borrow().iter() {
1240 match &node.data {
1241 markup5ever_rcdom::NodeData::Element { name, .. } => {
1242 if local_name!("caption") == name.local {
1243 let mut paragraph = MarkdownParagraph::new();
1244 self.parse_paragraph(source_range.clone(), node, &mut paragraph);
1245 caption = Some(paragraph);
1246 }
1247 if local_name!("thead") == name.local {
1248 // node should be a tr element
1249 for node in node.children.borrow().iter() {
1250 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1251 header_rows.push(row);
1252 }
1253 }
1254 } else if local_name!("tbody") == name.local {
1255 // node should be a tr element
1256 for node in node.children.borrow().iter() {
1257 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1258 body_rows.push(row);
1259 }
1260 }
1261 }
1262 }
1263 _ => {}
1264 }
1265 }
1266
1267 if !header_rows.is_empty() || !body_rows.is_empty() {
1268 Some(ParsedMarkdownTable {
1269 source_range,
1270 body: body_rows,
1271 header: header_rows,
1272 caption,
1273 })
1274 } else {
1275 None
1276 }
1277 }
1278}
1279
1280#[cfg(test)]
1281mod tests {
1282 use super::*;
1283 use ParsedMarkdownListItemType::*;
1284 use core::panic;
1285 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1286 use language::{
1287 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1288 };
1289 use pretty_assertions::assert_eq;
1290
1291 async fn parse(input: &str) -> ParsedMarkdown {
1292 parse_markdown(input, None, None).await
1293 }
1294
1295 #[gpui::test]
1296 async fn test_headings() {
1297 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1298
1299 assert_eq!(
1300 parsed.children,
1301 vec![
1302 h1(text("Heading one", 2..13), 0..14),
1303 h2(text("Heading two", 17..28), 14..29),
1304 h3(text("Heading three", 33..46), 29..46),
1305 ]
1306 );
1307 }
1308
1309 #[gpui::test]
1310 async fn test_newlines_dont_new_paragraphs() {
1311 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1312
1313 assert_eq!(
1314 parsed.children,
1315 vec![p("Some text that is bolded and italicized", 0..46)]
1316 );
1317 }
1318
1319 #[gpui::test]
1320 async fn test_heading_with_paragraph() {
1321 let parsed = parse("# Zed\nThe editor").await;
1322
1323 assert_eq!(
1324 parsed.children,
1325 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1326 );
1327 }
1328
1329 #[gpui::test]
1330 async fn test_double_newlines_do_new_paragraphs() {
1331 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1332
1333 assert_eq!(
1334 parsed.children,
1335 vec![
1336 p("Some text that is bolded", 0..29),
1337 p("and italicized", 31..47),
1338 ]
1339 );
1340 }
1341
1342 #[gpui::test]
1343 async fn test_bold_italic_text() {
1344 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1345
1346 assert_eq!(
1347 parsed.children,
1348 vec![p("Some text that is bolded and italicized", 0..45)]
1349 );
1350 }
1351
1352 #[gpui::test]
1353 async fn test_nested_bold_strikethrough_text() {
1354 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1355
1356 assert_eq!(parsed.children.len(), 1);
1357 assert_eq!(
1358 parsed.children[0],
1359 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1360 ParsedMarkdownText {
1361 source_range: 0..35,
1362 contents: "Some bostrikethroughld text".into(),
1363 highlights: Vec::new(),
1364 region_ranges: Vec::new(),
1365 regions: Vec::new(),
1366 }
1367 )])
1368 );
1369
1370 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1371 text
1372 } else {
1373 panic!("Expected a paragraph");
1374 };
1375
1376 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1377 text
1378 } else {
1379 panic!("Expected a text");
1380 };
1381
1382 assert_eq!(
1383 paragraph.highlights,
1384 vec![
1385 (
1386 5..7,
1387 MarkdownHighlight::Style(MarkdownHighlightStyle {
1388 weight: FontWeight::BOLD,
1389 ..Default::default()
1390 }),
1391 ),
1392 (
1393 7..20,
1394 MarkdownHighlight::Style(MarkdownHighlightStyle {
1395 weight: FontWeight::BOLD,
1396 strikethrough: true,
1397 ..Default::default()
1398 }),
1399 ),
1400 (
1401 20..22,
1402 MarkdownHighlight::Style(MarkdownHighlightStyle {
1403 weight: FontWeight::BOLD,
1404 ..Default::default()
1405 }),
1406 ),
1407 ]
1408 );
1409 }
1410
1411 #[gpui::test]
1412 async fn test_text_with_inline_html() {
1413 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1414
1415 assert_eq!(
1416 parsed.children,
1417 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1418 );
1419 }
1420
1421 #[gpui::test]
1422 async fn test_raw_links_detection() {
1423 let parsed = parse("Checkout this https://zed.dev link").await;
1424
1425 assert_eq!(
1426 parsed.children,
1427 vec![p("Checkout this https://zed.dev link", 0..34)]
1428 );
1429 }
1430
1431 #[gpui::test]
1432 async fn test_empty_image() {
1433 let parsed = parse("![]()").await;
1434
1435 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1436 text
1437 } else {
1438 panic!("Expected a paragraph");
1439 };
1440 assert_eq!(paragraph.len(), 0);
1441 }
1442
1443 #[gpui::test]
1444 async fn test_image_links_detection() {
1445 let parsed = parse("").await;
1446
1447 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1448 text
1449 } else {
1450 panic!("Expected a paragraph");
1451 };
1452 assert_eq!(
1453 paragraph[0],
1454 MarkdownParagraphChunk::Image(Image {
1455 source_range: 0..111,
1456 link: Link::Web {
1457 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1458 },
1459 alt_text: Some("test".into()),
1460 height: None,
1461 width: None,
1462 },)
1463 );
1464 }
1465
1466 #[gpui::test]
1467 async fn test_image_alt_text() {
1468 let parsed = parse("[](https://zed.dev)\n ").await;
1469
1470 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1471 text
1472 } else {
1473 panic!("Expected a paragraph");
1474 };
1475 assert_eq!(
1476 paragraph[0],
1477 MarkdownParagraphChunk::Image(Image {
1478 source_range: 0..142,
1479 link: Link::Web {
1480 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1481 },
1482 alt_text: Some("Zed".into()),
1483 height: None,
1484 width: None,
1485 },)
1486 );
1487 }
1488
1489 #[gpui::test]
1490 async fn test_image_without_alt_text() {
1491 let parsed = parse("").await;
1492
1493 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1494 text
1495 } else {
1496 panic!("Expected a paragraph");
1497 };
1498 assert_eq!(
1499 paragraph[0],
1500 MarkdownParagraphChunk::Image(Image {
1501 source_range: 0..31,
1502 link: Link::Web {
1503 url: "http://example.com/foo.png".to_string(),
1504 },
1505 alt_text: None,
1506 height: None,
1507 width: None,
1508 },)
1509 );
1510 }
1511
1512 #[gpui::test]
1513 async fn test_image_with_alt_text_containing_formatting() {
1514 let parsed = parse("").await;
1515
1516 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1517 panic!("Expected a paragraph");
1518 };
1519 assert_eq!(
1520 chunks,
1521 &[MarkdownParagraphChunk::Image(Image {
1522 source_range: 0..44,
1523 link: Link::Web {
1524 url: "http://example.com/foo.png".to_string(),
1525 },
1526 alt_text: Some("foo bar baz".into()),
1527 height: None,
1528 width: None,
1529 }),],
1530 );
1531 }
1532
1533 #[gpui::test]
1534 async fn test_images_with_text_in_between() {
1535 let parsed = parse(
1536 "\nLorem Ipsum\n",
1537 )
1538 .await;
1539
1540 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1541 text
1542 } else {
1543 panic!("Expected a paragraph");
1544 };
1545 assert_eq!(
1546 chunks,
1547 &vec![
1548 MarkdownParagraphChunk::Image(Image {
1549 source_range: 0..81,
1550 link: Link::Web {
1551 url: "http://example.com/foo.png".to_string(),
1552 },
1553 alt_text: Some("foo".into()),
1554 height: None,
1555 width: None,
1556 }),
1557 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1558 source_range: 0..81,
1559 contents: " Lorem Ipsum ".into(),
1560 highlights: Vec::new(),
1561 region_ranges: Vec::new(),
1562 regions: Vec::new(),
1563 }),
1564 MarkdownParagraphChunk::Image(Image {
1565 source_range: 0..81,
1566 link: Link::Web {
1567 url: "http://example.com/bar.png".to_string(),
1568 },
1569 alt_text: Some("bar".into()),
1570 height: None,
1571 width: None,
1572 })
1573 ]
1574 );
1575 }
1576
1577 #[test]
1578 fn test_parse_html_element_dimension() {
1579 // Test percentage values
1580 assert_eq!(
1581 MarkdownParser::parse_html_element_dimension("50%"),
1582 Some(DefiniteLength::Fraction(0.5))
1583 );
1584 assert_eq!(
1585 MarkdownParser::parse_html_element_dimension("100%"),
1586 Some(DefiniteLength::Fraction(1.0))
1587 );
1588 assert_eq!(
1589 MarkdownParser::parse_html_element_dimension("25%"),
1590 Some(DefiniteLength::Fraction(0.25))
1591 );
1592 assert_eq!(
1593 MarkdownParser::parse_html_element_dimension("0%"),
1594 Some(DefiniteLength::Fraction(0.0))
1595 );
1596
1597 // Test pixel values
1598 assert_eq!(
1599 MarkdownParser::parse_html_element_dimension("100px"),
1600 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1601 );
1602 assert_eq!(
1603 MarkdownParser::parse_html_element_dimension("50px"),
1604 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1605 );
1606 assert_eq!(
1607 MarkdownParser::parse_html_element_dimension("0px"),
1608 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1609 );
1610
1611 // Test values without units (should be treated as pixels)
1612 assert_eq!(
1613 MarkdownParser::parse_html_element_dimension("100"),
1614 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1615 );
1616 assert_eq!(
1617 MarkdownParser::parse_html_element_dimension("42"),
1618 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1619 );
1620
1621 // Test invalid values
1622 assert_eq!(
1623 MarkdownParser::parse_html_element_dimension("invalid"),
1624 None
1625 );
1626 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1627 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1628 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1629 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1630 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1631
1632 // Test decimal values
1633 assert_eq!(
1634 MarkdownParser::parse_html_element_dimension("50.5%"),
1635 Some(DefiniteLength::Fraction(0.505))
1636 );
1637 assert_eq!(
1638 MarkdownParser::parse_html_element_dimension("100.25px"),
1639 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1640 );
1641 assert_eq!(
1642 MarkdownParser::parse_html_element_dimension("42.0"),
1643 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1644 );
1645 }
1646
1647 #[gpui::test]
1648 async fn test_html_unordered_list() {
1649 let parsed = parse(
1650 "<ul>
1651 <li>Item 1</li>
1652 <li>Item 2</li>
1653 </ul>",
1654 )
1655 .await;
1656
1657 assert_eq!(
1658 ParsedMarkdown {
1659 children: vec![
1660 nested_list_item(
1661 0..82,
1662 1,
1663 ParsedMarkdownListItemType::Unordered,
1664 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1665 ),
1666 nested_list_item(
1667 0..82,
1668 1,
1669 ParsedMarkdownListItemType::Unordered,
1670 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1671 ),
1672 ]
1673 },
1674 parsed
1675 );
1676 }
1677
1678 #[gpui::test]
1679 async fn test_html_ordered_list() {
1680 let parsed = parse(
1681 "<ol>
1682 <li>Item 1</li>
1683 <li>Item 2</li>
1684 </ol>",
1685 )
1686 .await;
1687
1688 assert_eq!(
1689 ParsedMarkdown {
1690 children: vec![
1691 nested_list_item(
1692 0..82,
1693 1,
1694 ParsedMarkdownListItemType::Ordered(1),
1695 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1696 ),
1697 nested_list_item(
1698 0..82,
1699 1,
1700 ParsedMarkdownListItemType::Ordered(2),
1701 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1702 ),
1703 ]
1704 },
1705 parsed
1706 );
1707 }
1708
1709 #[gpui::test]
1710 async fn test_html_nested_ordered_list() {
1711 let parsed = parse(
1712 "<ol>
1713 <li>Item 1</li>
1714 <li>Item 2
1715 <ol>
1716 <li>Sub-Item 1</li>
1717 <li>Sub-Item 2</li>
1718 </ol>
1719 </li>
1720 </ol>",
1721 )
1722 .await;
1723
1724 assert_eq!(
1725 ParsedMarkdown {
1726 children: vec![
1727 nested_list_item(
1728 0..216,
1729 1,
1730 ParsedMarkdownListItemType::Ordered(1),
1731 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1732 ),
1733 nested_list_item(
1734 0..216,
1735 1,
1736 ParsedMarkdownListItemType::Ordered(2),
1737 vec![
1738 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1739 nested_list_item(
1740 0..216,
1741 2,
1742 ParsedMarkdownListItemType::Ordered(1),
1743 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1744 ),
1745 nested_list_item(
1746 0..216,
1747 2,
1748 ParsedMarkdownListItemType::Ordered(2),
1749 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1750 ),
1751 ]
1752 ),
1753 ]
1754 },
1755 parsed
1756 );
1757 }
1758
1759 #[gpui::test]
1760 async fn test_html_nested_unordered_list() {
1761 let parsed = parse(
1762 "<ul>
1763 <li>Item 1</li>
1764 <li>Item 2
1765 <ul>
1766 <li>Sub-Item 1</li>
1767 <li>Sub-Item 2</li>
1768 </ul>
1769 </li>
1770 </ul>",
1771 )
1772 .await;
1773
1774 assert_eq!(
1775 ParsedMarkdown {
1776 children: vec![
1777 nested_list_item(
1778 0..216,
1779 1,
1780 ParsedMarkdownListItemType::Unordered,
1781 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1782 ),
1783 nested_list_item(
1784 0..216,
1785 1,
1786 ParsedMarkdownListItemType::Unordered,
1787 vec![
1788 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1789 nested_list_item(
1790 0..216,
1791 2,
1792 ParsedMarkdownListItemType::Unordered,
1793 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1794 ),
1795 nested_list_item(
1796 0..216,
1797 2,
1798 ParsedMarkdownListItemType::Unordered,
1799 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1800 ),
1801 ]
1802 ),
1803 ]
1804 },
1805 parsed
1806 );
1807 }
1808
1809 #[gpui::test]
1810 async fn test_inline_html_image_tag() {
1811 let parsed =
1812 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1813 .await;
1814
1815 assert_eq!(
1816 ParsedMarkdown {
1817 children: vec![ParsedMarkdownElement::Paragraph(vec![
1818 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1819 source_range: 0..71,
1820 contents: "Some text".into(),
1821 highlights: Default::default(),
1822 region_ranges: Default::default(),
1823 regions: Default::default()
1824 }),
1825 MarkdownParagraphChunk::Image(Image {
1826 source_range: 0..71,
1827 link: Link::Web {
1828 url: "http://example.com/foo.png".to_string(),
1829 },
1830 alt_text: None,
1831 height: None,
1832 width: None,
1833 }),
1834 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1835 source_range: 0..71,
1836 contents: " some more text".into(),
1837 highlights: Default::default(),
1838 region_ranges: Default::default(),
1839 regions: Default::default()
1840 }),
1841 ])]
1842 },
1843 parsed
1844 );
1845 }
1846
1847 #[gpui::test]
1848 async fn test_html_block_quote() {
1849 let parsed = parse(
1850 "<blockquote>
1851 <p>some description</p>
1852 </blockquote>",
1853 )
1854 .await;
1855
1856 assert_eq!(
1857 ParsedMarkdown {
1858 children: vec![block_quote(
1859 vec![ParsedMarkdownElement::Paragraph(text(
1860 "some description",
1861 0..78
1862 ))],
1863 0..78,
1864 )]
1865 },
1866 parsed
1867 );
1868 }
1869
1870 #[gpui::test]
1871 async fn test_html_nested_block_quote() {
1872 let parsed = parse(
1873 "<blockquote>
1874 <p>some description</p>
1875 <blockquote>
1876 <p>second description</p>
1877 </blockquote>
1878 </blockquote>",
1879 )
1880 .await;
1881
1882 assert_eq!(
1883 ParsedMarkdown {
1884 children: vec![block_quote(
1885 vec![
1886 ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
1887 block_quote(
1888 vec![ParsedMarkdownElement::Paragraph(text(
1889 "second description",
1890 0..179
1891 ))],
1892 0..179,
1893 )
1894 ],
1895 0..179,
1896 )]
1897 },
1898 parsed
1899 );
1900 }
1901
1902 #[gpui::test]
1903 async fn test_html_table() {
1904 let parsed = parse(
1905 "<table>
1906 <thead>
1907 <tr>
1908 <th>Id</th>
1909 <th>Name</th>
1910 </tr>
1911 </thead>
1912 <tbody>
1913 <tr>
1914 <td>1</td>
1915 <td>Chris</td>
1916 </tr>
1917 <tr>
1918 <td>2</td>
1919 <td>Dennis</td>
1920 </tr>
1921 </tbody>
1922 </table>",
1923 )
1924 .await;
1925
1926 assert_eq!(
1927 ParsedMarkdown {
1928 children: vec![ParsedMarkdownElement::Table(table(
1929 0..366,
1930 None,
1931 vec![row(vec![
1932 column(
1933 1,
1934 1,
1935 true,
1936 text("Id", 0..366),
1937 ParsedMarkdownTableAlignment::Center
1938 ),
1939 column(
1940 1,
1941 1,
1942 true,
1943 text("Name ", 0..366),
1944 ParsedMarkdownTableAlignment::Center
1945 )
1946 ])],
1947 vec![
1948 row(vec![
1949 column(
1950 1,
1951 1,
1952 false,
1953 text("1", 0..366),
1954 ParsedMarkdownTableAlignment::None
1955 ),
1956 column(
1957 1,
1958 1,
1959 false,
1960 text("Chris", 0..366),
1961 ParsedMarkdownTableAlignment::None
1962 )
1963 ]),
1964 row(vec![
1965 column(
1966 1,
1967 1,
1968 false,
1969 text("2", 0..366),
1970 ParsedMarkdownTableAlignment::None
1971 ),
1972 column(
1973 1,
1974 1,
1975 false,
1976 text("Dennis", 0..366),
1977 ParsedMarkdownTableAlignment::None
1978 )
1979 ]),
1980 ],
1981 ))],
1982 },
1983 parsed
1984 );
1985 }
1986
1987 #[gpui::test]
1988 async fn test_html_table_with_caption() {
1989 let parsed = parse(
1990 "<table>
1991 <caption>My Table</caption>
1992 <tbody>
1993 <tr>
1994 <td>1</td>
1995 <td>Chris</td>
1996 </tr>
1997 <tr>
1998 <td>2</td>
1999 <td>Dennis</td>
2000 </tr>
2001 </tbody>
2002 </table>",
2003 )
2004 .await;
2005
2006 assert_eq!(
2007 ParsedMarkdown {
2008 children: vec![ParsedMarkdownElement::Table(table(
2009 0..280,
2010 Some(vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2011 source_range: 0..280,
2012 contents: "My Table".into(),
2013 highlights: Default::default(),
2014 region_ranges: Default::default(),
2015 regions: Default::default()
2016 })]),
2017 vec![],
2018 vec![
2019 row(vec![
2020 column(
2021 1,
2022 1,
2023 false,
2024 text("1", 0..280),
2025 ParsedMarkdownTableAlignment::None
2026 ),
2027 column(
2028 1,
2029 1,
2030 false,
2031 text("Chris", 0..280),
2032 ParsedMarkdownTableAlignment::None
2033 )
2034 ]),
2035 row(vec![
2036 column(
2037 1,
2038 1,
2039 false,
2040 text("2", 0..280),
2041 ParsedMarkdownTableAlignment::None
2042 ),
2043 column(
2044 1,
2045 1,
2046 false,
2047 text("Dennis", 0..280),
2048 ParsedMarkdownTableAlignment::None
2049 )
2050 ]),
2051 ],
2052 ))],
2053 },
2054 parsed
2055 );
2056 }
2057
2058 #[gpui::test]
2059 async fn test_html_table_without_headings() {
2060 let parsed = parse(
2061 "<table>
2062 <tbody>
2063 <tr>
2064 <td>1</td>
2065 <td>Chris</td>
2066 </tr>
2067 <tr>
2068 <td>2</td>
2069 <td>Dennis</td>
2070 </tr>
2071 </tbody>
2072 </table>",
2073 )
2074 .await;
2075
2076 assert_eq!(
2077 ParsedMarkdown {
2078 children: vec![ParsedMarkdownElement::Table(table(
2079 0..240,
2080 None,
2081 vec![],
2082 vec![
2083 row(vec![
2084 column(
2085 1,
2086 1,
2087 false,
2088 text("1", 0..240),
2089 ParsedMarkdownTableAlignment::None
2090 ),
2091 column(
2092 1,
2093 1,
2094 false,
2095 text("Chris", 0..240),
2096 ParsedMarkdownTableAlignment::None
2097 )
2098 ]),
2099 row(vec![
2100 column(
2101 1,
2102 1,
2103 false,
2104 text("2", 0..240),
2105 ParsedMarkdownTableAlignment::None
2106 ),
2107 column(
2108 1,
2109 1,
2110 false,
2111 text("Dennis", 0..240),
2112 ParsedMarkdownTableAlignment::None
2113 )
2114 ]),
2115 ],
2116 ))],
2117 },
2118 parsed
2119 );
2120 }
2121
2122 #[gpui::test]
2123 async fn test_html_table_without_body() {
2124 let parsed = parse(
2125 "<table>
2126 <thead>
2127 <tr>
2128 <th>Id</th>
2129 <th>Name</th>
2130 </tr>
2131 </thead>
2132 </table>",
2133 )
2134 .await;
2135
2136 assert_eq!(
2137 ParsedMarkdown {
2138 children: vec![ParsedMarkdownElement::Table(table(
2139 0..150,
2140 None,
2141 vec![row(vec![
2142 column(
2143 1,
2144 1,
2145 true,
2146 text("Id", 0..150),
2147 ParsedMarkdownTableAlignment::Center
2148 ),
2149 column(
2150 1,
2151 1,
2152 true,
2153 text("Name", 0..150),
2154 ParsedMarkdownTableAlignment::Center
2155 )
2156 ])],
2157 vec![],
2158 ))],
2159 },
2160 parsed
2161 );
2162 }
2163
2164 #[gpui::test]
2165 async fn test_html_heading_tags() {
2166 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2167
2168 assert_eq!(
2169 ParsedMarkdown {
2170 children: vec![
2171 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2172 level: HeadingLevel::H1,
2173 source_range: 0..96,
2174 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2175 source_range: 0..96,
2176 contents: "Heading".into(),
2177 highlights: Vec::default(),
2178 region_ranges: Vec::default(),
2179 regions: Vec::default()
2180 })],
2181 }),
2182 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2183 level: HeadingLevel::H2,
2184 source_range: 0..96,
2185 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2186 source_range: 0..96,
2187 contents: "Heading".into(),
2188 highlights: Vec::default(),
2189 region_ranges: Vec::default(),
2190 regions: Vec::default()
2191 })],
2192 }),
2193 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2194 level: HeadingLevel::H3,
2195 source_range: 0..96,
2196 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2197 source_range: 0..96,
2198 contents: "Heading".into(),
2199 highlights: Vec::default(),
2200 region_ranges: Vec::default(),
2201 regions: Vec::default()
2202 })],
2203 }),
2204 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2205 level: HeadingLevel::H4,
2206 source_range: 0..96,
2207 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2208 source_range: 0..96,
2209 contents: "Heading".into(),
2210 highlights: Vec::default(),
2211 region_ranges: Vec::default(),
2212 regions: Vec::default()
2213 })],
2214 }),
2215 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2216 level: HeadingLevel::H5,
2217 source_range: 0..96,
2218 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2219 source_range: 0..96,
2220 contents: "Heading".into(),
2221 highlights: Vec::default(),
2222 region_ranges: Vec::default(),
2223 regions: Vec::default()
2224 })],
2225 }),
2226 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2227 level: HeadingLevel::H6,
2228 source_range: 0..96,
2229 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2230 source_range: 0..96,
2231 contents: "Heading".into(),
2232 highlights: Vec::default(),
2233 region_ranges: Vec::default(),
2234 regions: Vec::default()
2235 })],
2236 }),
2237 ],
2238 },
2239 parsed
2240 );
2241 }
2242
2243 #[gpui::test]
2244 async fn test_html_image_tag() {
2245 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2246
2247 assert_eq!(
2248 ParsedMarkdown {
2249 children: vec![ParsedMarkdownElement::Image(Image {
2250 source_range: 0..40,
2251 link: Link::Web {
2252 url: "http://example.com/foo.png".to_string(),
2253 },
2254 alt_text: None,
2255 height: None,
2256 width: None,
2257 })]
2258 },
2259 parsed
2260 );
2261 }
2262
2263 #[gpui::test]
2264 async fn test_html_image_tag_with_alt_text() {
2265 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2266
2267 assert_eq!(
2268 ParsedMarkdown {
2269 children: vec![ParsedMarkdownElement::Image(Image {
2270 source_range: 0..50,
2271 link: Link::Web {
2272 url: "http://example.com/foo.png".to_string(),
2273 },
2274 alt_text: Some("Foo".into()),
2275 height: None,
2276 width: None,
2277 })]
2278 },
2279 parsed
2280 );
2281 }
2282
2283 #[gpui::test]
2284 async fn test_html_image_tag_with_height_and_width() {
2285 let parsed =
2286 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2287
2288 assert_eq!(
2289 ParsedMarkdown {
2290 children: vec![ParsedMarkdownElement::Image(Image {
2291 source_range: 0..65,
2292 link: Link::Web {
2293 url: "http://example.com/foo.png".to_string(),
2294 },
2295 alt_text: None,
2296 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2297 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2298 })]
2299 },
2300 parsed
2301 );
2302 }
2303
2304 #[gpui::test]
2305 async fn test_html_image_style_tag_with_height_and_width() {
2306 let parsed = parse(
2307 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2308 )
2309 .await;
2310
2311 assert_eq!(
2312 ParsedMarkdown {
2313 children: vec![ParsedMarkdownElement::Image(Image {
2314 source_range: 0..75,
2315 link: Link::Web {
2316 url: "http://example.com/foo.png".to_string(),
2317 },
2318 alt_text: None,
2319 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2320 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2321 })]
2322 },
2323 parsed
2324 );
2325 }
2326
2327 #[gpui::test]
2328 async fn test_header_only_table() {
2329 let markdown = "\
2330| Header 1 | Header 2 |
2331|----------|----------|
2332
2333Some other content
2334";
2335
2336 let expected_table = table(
2337 0..48,
2338 None,
2339 vec![row(vec![
2340 column(
2341 1,
2342 1,
2343 true,
2344 text("Header 1", 1..11),
2345 ParsedMarkdownTableAlignment::None,
2346 ),
2347 column(
2348 1,
2349 1,
2350 true,
2351 text("Header 2", 12..22),
2352 ParsedMarkdownTableAlignment::None,
2353 ),
2354 ])],
2355 vec![],
2356 );
2357
2358 assert_eq!(
2359 parse(markdown).await.children[0],
2360 ParsedMarkdownElement::Table(expected_table)
2361 );
2362 }
2363
2364 #[gpui::test]
2365 async fn test_basic_table() {
2366 let markdown = "\
2367| Header 1 | Header 2 |
2368|----------|----------|
2369| Cell 1 | Cell 2 |
2370| Cell 3 | Cell 4 |";
2371
2372 let expected_table = table(
2373 0..95,
2374 None,
2375 vec![row(vec![
2376 column(
2377 1,
2378 1,
2379 true,
2380 text("Header 1", 1..11),
2381 ParsedMarkdownTableAlignment::None,
2382 ),
2383 column(
2384 1,
2385 1,
2386 true,
2387 text("Header 2", 12..22),
2388 ParsedMarkdownTableAlignment::None,
2389 ),
2390 ])],
2391 vec![
2392 row(vec![
2393 column(
2394 1,
2395 1,
2396 false,
2397 text("Cell 1", 49..59),
2398 ParsedMarkdownTableAlignment::None,
2399 ),
2400 column(
2401 1,
2402 1,
2403 false,
2404 text("Cell 2", 60..70),
2405 ParsedMarkdownTableAlignment::None,
2406 ),
2407 ]),
2408 row(vec![
2409 column(
2410 1,
2411 1,
2412 false,
2413 text("Cell 3", 73..83),
2414 ParsedMarkdownTableAlignment::None,
2415 ),
2416 column(
2417 1,
2418 1,
2419 false,
2420 text("Cell 4", 84..94),
2421 ParsedMarkdownTableAlignment::None,
2422 ),
2423 ]),
2424 ],
2425 );
2426
2427 assert_eq!(
2428 parse(markdown).await.children[0],
2429 ParsedMarkdownElement::Table(expected_table)
2430 );
2431 }
2432
2433 #[gpui::test]
2434 async fn test_list_basic() {
2435 let parsed = parse(
2436 "\
2437* Item 1
2438* Item 2
2439* Item 3
2440",
2441 )
2442 .await;
2443
2444 assert_eq!(
2445 parsed.children,
2446 vec![
2447 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2448 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2449 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2450 ],
2451 );
2452 }
2453
2454 #[gpui::test]
2455 async fn test_list_with_tasks() {
2456 let parsed = parse(
2457 "\
2458- [ ] TODO
2459- [x] Checked
2460",
2461 )
2462 .await;
2463
2464 assert_eq!(
2465 parsed.children,
2466 vec![
2467 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2468 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2469 ],
2470 );
2471 }
2472
2473 #[gpui::test]
2474 async fn test_list_with_indented_task() {
2475 let parsed = parse(
2476 "\
2477- [ ] TODO
2478 - [x] Checked
2479 - Unordered
2480 1. Number 1
2481 1. Number 2
24821. Number A
2483",
2484 )
2485 .await;
2486
2487 assert_eq!(
2488 parsed.children,
2489 vec![
2490 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2491 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2492 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2493 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2494 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2495 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2496 ],
2497 );
2498 }
2499
2500 #[gpui::test]
2501 async fn test_list_with_linebreak_is_handled_correctly() {
2502 let parsed = parse(
2503 "\
2504- [ ] Task 1
2505
2506- [x] Task 2
2507",
2508 )
2509 .await;
2510
2511 assert_eq!(
2512 parsed.children,
2513 vec![
2514 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2515 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2516 ],
2517 );
2518 }
2519
2520 #[gpui::test]
2521 async fn test_list_nested() {
2522 let parsed = parse(
2523 "\
2524* Item 1
2525* Item 2
2526* Item 3
2527
25281. Hello
25291. Two
2530 1. Three
25312. Four
25323. Five
2533
2534* First
2535 1. Hello
2536 1. Goodbyte
2537 - Inner
2538 - Inner
2539 2. Goodbyte
2540 - Next item empty
2541 -
2542* Last
2543",
2544 )
2545 .await;
2546
2547 assert_eq!(
2548 parsed.children,
2549 vec![
2550 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2551 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2552 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2553 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2554 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2555 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2556 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2557 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2558 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2559 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2560 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2561 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2562 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2563 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2564 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2565 list_item(186..190, 3, Unordered, vec![]),
2566 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2567 ]
2568 );
2569 }
2570
2571 #[gpui::test]
2572 async fn test_list_with_nested_content() {
2573 let parsed = parse(
2574 "\
2575* This is a list item with two paragraphs.
2576
2577 This is the second paragraph in the list item.
2578",
2579 )
2580 .await;
2581
2582 assert_eq!(
2583 parsed.children,
2584 vec![list_item(
2585 0..96,
2586 1,
2587 Unordered,
2588 vec![
2589 p("This is a list item with two paragraphs.", 4..44),
2590 p("This is the second paragraph in the list item.", 50..97)
2591 ],
2592 ),],
2593 );
2594 }
2595
2596 #[gpui::test]
2597 async fn test_list_item_with_inline_html() {
2598 let parsed = parse(
2599 "\
2600* This is a list item with an inline HTML <sometag>tag</sometag>.
2601",
2602 )
2603 .await;
2604
2605 assert_eq!(
2606 parsed.children,
2607 vec![list_item(
2608 0..67,
2609 1,
2610 Unordered,
2611 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2612 ),],
2613 );
2614 }
2615
2616 #[gpui::test]
2617 async fn test_nested_list_with_paragraph_inside() {
2618 let parsed = parse(
2619 "\
26201. a
2621 1. b
2622 1. c
2623
2624 text
2625
2626 1. d
2627",
2628 )
2629 .await;
2630
2631 assert_eq!(
2632 parsed.children,
2633 vec![
2634 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2635 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2636 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2637 p("text", 32..37),
2638 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2639 ],
2640 );
2641 }
2642
2643 #[gpui::test]
2644 async fn test_list_with_leading_text() {
2645 let parsed = parse(
2646 "\
2647* `code`
2648* **bold**
2649* [link](https://example.com)
2650",
2651 )
2652 .await;
2653
2654 assert_eq!(
2655 parsed.children,
2656 vec![
2657 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2658 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2659 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2660 ],
2661 );
2662 }
2663
2664 #[gpui::test]
2665 async fn test_simple_block_quote() {
2666 let parsed = parse("> Simple block quote with **styled text**").await;
2667
2668 assert_eq!(
2669 parsed.children,
2670 vec![block_quote(
2671 vec![p("Simple block quote with styled text", 2..41)],
2672 0..41
2673 )]
2674 );
2675 }
2676
2677 #[gpui::test]
2678 async fn test_simple_block_quote_with_multiple_lines() {
2679 let parsed = parse(
2680 "\
2681> # Heading
2682> More
2683> text
2684>
2685> More text
2686",
2687 )
2688 .await;
2689
2690 assert_eq!(
2691 parsed.children,
2692 vec![block_quote(
2693 vec![
2694 h1(text("Heading", 4..11), 2..12),
2695 p("More text", 14..26),
2696 p("More text", 30..40)
2697 ],
2698 0..40
2699 )]
2700 );
2701 }
2702
2703 #[gpui::test]
2704 async fn test_nested_block_quote() {
2705 let parsed = parse(
2706 "\
2707> A
2708>
2709> > # B
2710>
2711> C
2712
2713More text
2714",
2715 )
2716 .await;
2717
2718 assert_eq!(
2719 parsed.children,
2720 vec![
2721 block_quote(
2722 vec![
2723 p("A", 2..4),
2724 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2725 p("C", 18..20)
2726 ],
2727 0..20
2728 ),
2729 p("More text", 21..31)
2730 ]
2731 );
2732 }
2733
2734 #[gpui::test]
2735 async fn test_code_block() {
2736 let parsed = parse(
2737 "\
2738```
2739fn main() {
2740 return 0;
2741}
2742```
2743",
2744 )
2745 .await;
2746
2747 assert_eq!(
2748 parsed.children,
2749 vec![code_block(
2750 None,
2751 "fn main() {\n return 0;\n}",
2752 0..35,
2753 None
2754 )]
2755 );
2756 }
2757
2758 #[gpui::test]
2759 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2760 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2761 language_registry.add(rust_lang());
2762
2763 let parsed = parse_markdown(
2764 "\
2765```rust
2766fn main() {
2767 return 0;
2768}
2769```
2770",
2771 None,
2772 Some(language_registry),
2773 )
2774 .await;
2775
2776 assert_eq!(
2777 parsed.children,
2778 vec![code_block(
2779 Some("rust".to_string()),
2780 "fn main() {\n return 0;\n}",
2781 0..39,
2782 Some(vec![])
2783 )]
2784 );
2785 }
2786
2787 fn rust_lang() -> Arc<Language> {
2788 Arc::new(Language::new(
2789 LanguageConfig {
2790 name: "Rust".into(),
2791 matcher: LanguageMatcher {
2792 path_suffixes: vec!["rs".into()],
2793 ..Default::default()
2794 },
2795 collapsed_placeholder: " /* ... */ ".to_string(),
2796 ..Default::default()
2797 },
2798 Some(tree_sitter_rust::LANGUAGE.into()),
2799 ))
2800 }
2801
2802 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2803 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2804 source_range,
2805 level: HeadingLevel::H1,
2806 contents,
2807 })
2808 }
2809
2810 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2811 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2812 source_range,
2813 level: HeadingLevel::H2,
2814 contents,
2815 })
2816 }
2817
2818 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2819 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2820 source_range,
2821 level: HeadingLevel::H3,
2822 contents,
2823 })
2824 }
2825
2826 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2827 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2828 }
2829
2830 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2831 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2832 highlights: Vec::new(),
2833 region_ranges: Vec::new(),
2834 regions: Vec::new(),
2835 source_range,
2836 contents: contents.to_string().into(),
2837 })]
2838 }
2839
2840 fn block_quote(
2841 children: Vec<ParsedMarkdownElement>,
2842 source_range: Range<usize>,
2843 ) -> ParsedMarkdownElement {
2844 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2845 source_range,
2846 children,
2847 })
2848 }
2849
2850 fn code_block(
2851 language: Option<String>,
2852 code: &str,
2853 source_range: Range<usize>,
2854 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2855 ) -> ParsedMarkdownElement {
2856 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2857 source_range,
2858 language,
2859 contents: code.to_string().into(),
2860 highlights,
2861 })
2862 }
2863
2864 fn list_item(
2865 source_range: Range<usize>,
2866 depth: u16,
2867 item_type: ParsedMarkdownListItemType,
2868 content: Vec<ParsedMarkdownElement>,
2869 ) -> ParsedMarkdownElement {
2870 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2871 source_range,
2872 item_type,
2873 depth,
2874 content,
2875 nested: false,
2876 })
2877 }
2878
2879 fn nested_list_item(
2880 source_range: Range<usize>,
2881 depth: u16,
2882 item_type: ParsedMarkdownListItemType,
2883 content: Vec<ParsedMarkdownElement>,
2884 ) -> ParsedMarkdownElement {
2885 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2886 source_range,
2887 item_type,
2888 depth,
2889 content,
2890 nested: true,
2891 })
2892 }
2893
2894 fn table(
2895 source_range: Range<usize>,
2896 caption: Option<MarkdownParagraph>,
2897 header: Vec<ParsedMarkdownTableRow>,
2898 body: Vec<ParsedMarkdownTableRow>,
2899 ) -> ParsedMarkdownTable {
2900 ParsedMarkdownTable {
2901 source_range,
2902 header,
2903 body,
2904 caption,
2905 }
2906 }
2907
2908 fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
2909 ParsedMarkdownTableRow { columns }
2910 }
2911
2912 fn column(
2913 col_span: usize,
2914 row_span: usize,
2915 is_header: bool,
2916 children: MarkdownParagraph,
2917 alignment: ParsedMarkdownTableAlignment,
2918 ) -> ParsedMarkdownTableColumn {
2919 ParsedMarkdownTableColumn {
2920 col_span,
2921 row_span,
2922 is_header,
2923 children,
2924 alignment,
2925 }
2926 }
2927
2928 impl PartialEq for ParsedMarkdownTable {
2929 fn eq(&self, other: &Self) -> bool {
2930 self.source_range == other.source_range
2931 && self.header == other.header
2932 && self.body == other.body
2933 }
2934 }
2935
2936 impl PartialEq for ParsedMarkdownText {
2937 fn eq(&self, other: &Self) -> bool {
2938 self.source_range == other.source_range && self.contents == other.contents
2939 }
2940 }
2941}