1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15
16pub async fn parse_markdown(
17 markdown_input: &str,
18 file_location_directory: Option<PathBuf>,
19 language_registry: Option<Arc<LanguageRegistry>>,
20) -> ParsedMarkdown {
21 let mut options = Options::all();
22 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
23
24 let parser = Parser::new_ext(markdown_input, options);
25 let parser = MarkdownParser::new(
26 parser.into_offset_iter().collect(),
27 file_location_directory,
28 language_registry,
29 );
30 let renderer = parser.parse_document().await;
31 ParsedMarkdown {
32 children: renderer.parsed,
33 }
34}
35
36fn cleanup_html(source: &str) -> Vec<u8> {
37 let mut writer = std::io::Cursor::new(Vec::new());
38 let mut reader = std::io::Cursor::new(source);
39 let mut minify = Minifier::new(
40 &mut writer,
41 MinifierOptions {
42 omit_doctype: true,
43 collapse_whitespace: true,
44 ..Default::default()
45 },
46 );
47 if let Ok(()) = minify.minify(&mut reader) {
48 writer.into_inner()
49 } else {
50 source.bytes().collect()
51 }
52}
53
54struct MarkdownParser<'a> {
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 /// The current index in the tokens array
57 cursor: usize,
58 /// The blocks that we have successfully parsed so far
59 parsed: Vec<ParsedMarkdownElement>,
60 file_location_directory: Option<PathBuf>,
61 language_registry: Option<Arc<LanguageRegistry>>,
62}
63
64#[derive(Debug)]
65struct ParseHtmlNodeContext {
66 list_item_depth: u16,
67}
68
69impl Default for ParseHtmlNodeContext {
70 fn default() -> Self {
71 Self { list_item_depth: 1 }
72 }
73}
74
75struct MarkdownListItem {
76 content: Vec<ParsedMarkdownElement>,
77 item_type: ParsedMarkdownListItemType,
78}
79
80impl Default for MarkdownListItem {
81 fn default() -> Self {
82 Self {
83 content: Vec::new(),
84 item_type: ParsedMarkdownListItemType::Unordered,
85 }
86 }
87}
88
89impl<'a> MarkdownParser<'a> {
90 fn new(
91 tokens: Vec<(Event<'a>, Range<usize>)>,
92 file_location_directory: Option<PathBuf>,
93 language_registry: Option<Arc<LanguageRegistry>>,
94 ) -> Self {
95 Self {
96 tokens,
97 file_location_directory,
98 language_registry,
99 cursor: 0,
100 parsed: vec![],
101 }
102 }
103
104 fn eof(&self) -> bool {
105 if self.tokens.is_empty() {
106 return true;
107 }
108 self.cursor >= self.tokens.len() - 1
109 }
110
111 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
112 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
113 return self.tokens.last();
114 }
115 self.tokens.get(self.cursor + steps)
116 }
117
118 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
119 if self.cursor == 0 || self.cursor > self.tokens.len() {
120 return None;
121 }
122 self.tokens.get(self.cursor - 1)
123 }
124
125 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
126 self.peek(0)
127 }
128
129 fn current_event(&self) -> Option<&Event<'_>> {
130 self.current().map(|(event, _)| event)
131 }
132
133 fn is_text_like(event: &Event) -> bool {
134 match event {
135 Event::Text(_)
136 // Represent an inline code block
137 | Event::Code(_)
138 | Event::Html(_)
139 | Event::InlineHtml(_)
140 | Event::FootnoteReference(_)
141 | Event::Start(Tag::Link { .. })
142 | Event::Start(Tag::Emphasis)
143 | Event::Start(Tag::Strong)
144 | Event::Start(Tag::Strikethrough)
145 | Event::Start(Tag::Image { .. }) => {
146 true
147 }
148 _ => false,
149 }
150 }
151
152 async fn parse_document(mut self) -> Self {
153 while !self.eof() {
154 if let Some(block) = self.parse_block().await {
155 self.parsed.extend(block);
156 } else {
157 self.cursor += 1;
158 }
159 }
160 self
161 }
162
163 #[async_recursion]
164 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
165 let (current, source_range) = self.current().unwrap();
166 let source_range = source_range.clone();
167 match current {
168 Event::Start(tag) => match tag {
169 Tag::Paragraph => {
170 self.cursor += 1;
171 let text = self.parse_text(false, Some(source_range));
172 Some(vec![ParsedMarkdownElement::Paragraph(text)])
173 }
174 Tag::Heading { level, .. } => {
175 let level = *level;
176 self.cursor += 1;
177 let heading = self.parse_heading(level);
178 Some(vec![ParsedMarkdownElement::Heading(heading)])
179 }
180 Tag::Table(alignment) => {
181 let alignment = alignment.clone();
182 self.cursor += 1;
183 let table = self.parse_table(alignment);
184 Some(vec![ParsedMarkdownElement::Table(table)])
185 }
186 Tag::List(order) => {
187 let order = *order;
188 self.cursor += 1;
189 let list = self.parse_list(order).await;
190 Some(list)
191 }
192 Tag::BlockQuote(_kind) => {
193 self.cursor += 1;
194 let block_quote = self.parse_block_quote().await;
195 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
196 }
197 Tag::CodeBlock(kind) => {
198 let language = match kind {
199 pulldown_cmark::CodeBlockKind::Indented => None,
200 pulldown_cmark::CodeBlockKind::Fenced(language) => {
201 if language.is_empty() {
202 None
203 } else {
204 Some(language.to_string())
205 }
206 }
207 };
208
209 self.cursor += 1;
210
211 let code_block = self.parse_code_block(language).await?;
212 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
213 }
214 Tag::HtmlBlock => {
215 self.cursor += 1;
216
217 Some(self.parse_html_block().await)
218 }
219 _ => None,
220 },
221 Event::Rule => {
222 self.cursor += 1;
223 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
224 }
225 _ => None,
226 }
227 }
228
229 fn parse_text(
230 &mut self,
231 should_complete_on_soft_break: bool,
232 source_range: Option<Range<usize>>,
233 ) -> MarkdownParagraph {
234 let source_range = source_range.unwrap_or_else(|| {
235 self.current()
236 .map(|(_, range)| range.clone())
237 .unwrap_or_default()
238 });
239
240 let mut markdown_text_like = Vec::new();
241 let mut text = String::new();
242 let mut bold_depth = 0;
243 let mut italic_depth = 0;
244 let mut strikethrough_depth = 0;
245 let mut link: Option<Link> = None;
246 let mut image: Option<Image> = None;
247 let mut region_ranges: Vec<Range<usize>> = vec![];
248 let mut regions: Vec<ParsedRegion> = vec![];
249 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
250 let mut link_urls: Vec<String> = vec![];
251 let mut link_ranges: Vec<Range<usize>> = vec![];
252
253 loop {
254 if self.eof() {
255 break;
256 }
257
258 let (current, _) = self.current().unwrap();
259 let prev_len = text.len();
260 match current {
261 Event::SoftBreak => {
262 if should_complete_on_soft_break {
263 break;
264 }
265 text.push(' ');
266 }
267
268 Event::HardBreak => {
269 text.push('\n');
270 }
271
272 // We want to ignore any inline HTML tags in the text but keep
273 // the text between them
274 Event::InlineHtml(_) => {}
275
276 Event::Text(t) => {
277 text.push_str(t.as_ref());
278 let mut style = MarkdownHighlightStyle::default();
279
280 if bold_depth > 0 {
281 style.weight = FontWeight::BOLD;
282 }
283
284 if italic_depth > 0 {
285 style.italic = true;
286 }
287
288 if strikethrough_depth > 0 {
289 style.strikethrough = true;
290 }
291
292 let last_run_len = if let Some(link) = link.clone() {
293 region_ranges.push(prev_len..text.len());
294 regions.push(ParsedRegion {
295 code: false,
296 link: Some(link),
297 });
298 style.link = true;
299 prev_len
300 } else {
301 // Manually scan for links
302 let mut finder = linkify::LinkFinder::new();
303 finder.kinds(&[linkify::LinkKind::Url]);
304 let mut last_link_len = prev_len;
305 for link in finder.links(t) {
306 let start = prev_len + link.start();
307 let end = prev_len + link.end();
308 let range = start..end;
309 link_ranges.push(range.clone());
310 link_urls.push(link.as_str().to_string());
311
312 // If there is a style before we match a link, we have to add this to the highlighted ranges
313 if style != MarkdownHighlightStyle::default() && last_link_len < start {
314 highlights.push((
315 last_link_len..start,
316 MarkdownHighlight::Style(style.clone()),
317 ));
318 }
319
320 highlights.push((
321 range.clone(),
322 MarkdownHighlight::Style(MarkdownHighlightStyle {
323 underline: true,
324 ..style
325 }),
326 ));
327 region_ranges.push(range.clone());
328 regions.push(ParsedRegion {
329 code: false,
330 link: Some(Link::Web {
331 url: link.as_str().to_string(),
332 }),
333 });
334 last_link_len = end;
335 }
336 last_link_len
337 };
338
339 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
340 let mut new_highlight = true;
341 if let Some((last_range, last_style)) = highlights.last_mut()
342 && last_range.end == last_run_len
343 && last_style == &MarkdownHighlight::Style(style.clone())
344 {
345 last_range.end = text.len();
346 new_highlight = false;
347 }
348 if new_highlight {
349 highlights.push((
350 last_run_len..text.len(),
351 MarkdownHighlight::Style(style.clone()),
352 ));
353 }
354 }
355 }
356 Event::Code(t) => {
357 text.push_str(t.as_ref());
358 region_ranges.push(prev_len..text.len());
359
360 if link.is_some() {
361 highlights.push((
362 prev_len..text.len(),
363 MarkdownHighlight::Style(MarkdownHighlightStyle {
364 link: true,
365 ..Default::default()
366 }),
367 ));
368 }
369 regions.push(ParsedRegion {
370 code: true,
371 link: link.clone(),
372 });
373 }
374 Event::Start(tag) => match tag {
375 Tag::Emphasis => italic_depth += 1,
376 Tag::Strong => bold_depth += 1,
377 Tag::Strikethrough => strikethrough_depth += 1,
378 Tag::Link { dest_url, .. } => {
379 link = Link::identify(
380 self.file_location_directory.clone(),
381 dest_url.to_string(),
382 );
383 }
384 Tag::Image { dest_url, .. } => {
385 if !text.is_empty() {
386 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
387 source_range: source_range.clone(),
388 contents: mem::take(&mut text).into(),
389 highlights: mem::take(&mut highlights),
390 region_ranges: mem::take(&mut region_ranges),
391 regions: mem::take(&mut regions),
392 });
393 markdown_text_like.push(parsed_regions);
394 }
395 image = Image::identify(
396 dest_url.to_string(),
397 source_range.clone(),
398 self.file_location_directory.clone(),
399 );
400 }
401 _ => {
402 break;
403 }
404 },
405
406 Event::End(tag) => match tag {
407 TagEnd::Emphasis => italic_depth -= 1,
408 TagEnd::Strong => bold_depth -= 1,
409 TagEnd::Strikethrough => strikethrough_depth -= 1,
410 TagEnd::Link => {
411 link = None;
412 }
413 TagEnd::Image => {
414 if let Some(mut image) = image.take() {
415 if !text.is_empty() {
416 image.set_alt_text(std::mem::take(&mut text).into());
417 mem::take(&mut highlights);
418 mem::take(&mut region_ranges);
419 mem::take(&mut regions);
420 }
421 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
422 }
423 }
424 TagEnd::Paragraph => {
425 self.cursor += 1;
426 break;
427 }
428 _ => {
429 break;
430 }
431 },
432 _ => {
433 break;
434 }
435 }
436
437 self.cursor += 1;
438 }
439 if !text.is_empty() {
440 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
441 source_range,
442 contents: text.into(),
443 highlights,
444 regions,
445 region_ranges,
446 }));
447 }
448 markdown_text_like
449 }
450
451 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
452 let (_event, source_range) = self.previous().unwrap();
453 let source_range = source_range.clone();
454 let text = self.parse_text(true, None);
455
456 // Advance past the heading end tag
457 self.cursor += 1;
458
459 ParsedMarkdownHeading {
460 source_range,
461 level: match level {
462 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
463 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
464 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
465 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
466 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
467 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
468 },
469 contents: text,
470 }
471 }
472
473 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
474 let (_event, source_range) = self.previous().unwrap();
475 let source_range = source_range.clone();
476 let mut header = vec![];
477 let mut body = vec![];
478 let mut row_columns = vec![];
479 let mut in_header = true;
480 let column_alignments = alignment
481 .iter()
482 .map(Self::convert_alignment)
483 .collect::<Vec<_>>();
484
485 loop {
486 if self.eof() {
487 break;
488 }
489
490 let (current, source_range) = self.current().unwrap();
491 let source_range = source_range.clone();
492 match current {
493 Event::Start(Tag::TableHead)
494 | Event::Start(Tag::TableRow)
495 | Event::End(TagEnd::TableCell) => {
496 self.cursor += 1;
497 }
498 Event::Start(Tag::TableCell) => {
499 self.cursor += 1;
500 let cell_contents = self.parse_text(false, Some(source_range));
501 row_columns.push(ParsedMarkdownTableColumn {
502 col_span: 1,
503 row_span: 1,
504 is_header: in_header,
505 children: cell_contents,
506 alignment: column_alignments
507 .get(row_columns.len())
508 .copied()
509 .unwrap_or_default(),
510 });
511 }
512 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
513 self.cursor += 1;
514 let columns = std::mem::take(&mut row_columns);
515 if in_header {
516 header.push(ParsedMarkdownTableRow { columns: columns });
517 in_header = false;
518 } else {
519 body.push(ParsedMarkdownTableRow::with_columns(columns));
520 }
521 }
522 Event::End(TagEnd::Table) => {
523 self.cursor += 1;
524 break;
525 }
526 _ => {
527 break;
528 }
529 }
530 }
531
532 ParsedMarkdownTable {
533 source_range,
534 header,
535 body,
536 }
537 }
538
539 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
540 match alignment {
541 Alignment::None => ParsedMarkdownTableAlignment::None,
542 Alignment::Left => ParsedMarkdownTableAlignment::Left,
543 Alignment::Center => ParsedMarkdownTableAlignment::Center,
544 Alignment::Right => ParsedMarkdownTableAlignment::Right,
545 }
546 }
547
548 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
549 let (_, list_source_range) = self.previous().unwrap();
550
551 let mut items = Vec::new();
552 let mut items_stack = vec![MarkdownListItem::default()];
553 let mut depth = 1;
554 let mut order = order;
555 let mut order_stack = Vec::new();
556
557 let mut insertion_indices = FxHashMap::default();
558 let mut source_ranges = FxHashMap::default();
559 let mut start_item_range = list_source_range.clone();
560
561 while !self.eof() {
562 let (current, source_range) = self.current().unwrap();
563 match current {
564 Event::Start(Tag::List(new_order)) => {
565 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
566 insertion_indices.insert(depth, items.len());
567 }
568
569 // We will use the start of the nested list as the end for the current item's range,
570 // because we don't care about the hierarchy of list items
571 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
572 e.insert(start_item_range.start..source_range.start);
573 }
574
575 order_stack.push(order);
576 order = *new_order;
577 self.cursor += 1;
578 depth += 1;
579 }
580 Event::End(TagEnd::List(_)) => {
581 order = order_stack.pop().flatten();
582 self.cursor += 1;
583 depth -= 1;
584
585 if depth == 0 {
586 break;
587 }
588 }
589 Event::Start(Tag::Item) => {
590 start_item_range = source_range.clone();
591
592 self.cursor += 1;
593 items_stack.push(MarkdownListItem::default());
594
595 let mut task_list = None;
596 // Check for task list marker (`- [ ]` or `- [x]`)
597 if let Some(event) = self.current_event() {
598 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
599 if event == &Event::Start(Tag::Paragraph) {
600 self.cursor += 1;
601 }
602
603 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
604 task_list = Some((*checked, range.clone()));
605 self.cursor += 1;
606 }
607 }
608
609 if let Some((event, range)) = self.current() {
610 // This is a plain list item.
611 // For example `- some text` or `1. [Docs](./docs.md)`
612 if MarkdownParser::is_text_like(event) {
613 let text = self.parse_text(false, Some(range.clone()));
614 let block = ParsedMarkdownElement::Paragraph(text);
615 if let Some(content) = items_stack.last_mut() {
616 let item_type = if let Some((checked, range)) = task_list {
617 ParsedMarkdownListItemType::Task(checked, range)
618 } else if let Some(order) = order {
619 ParsedMarkdownListItemType::Ordered(order)
620 } else {
621 ParsedMarkdownListItemType::Unordered
622 };
623 content.item_type = item_type;
624 content.content.push(block);
625 }
626 } else {
627 let block = self.parse_block().await;
628 if let Some(block) = block
629 && let Some(list_item) = items_stack.last_mut()
630 {
631 list_item.content.extend(block);
632 }
633 }
634 }
635
636 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
637 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
638 self.cursor += 1;
639 }
640 }
641 Event::End(TagEnd::Item) => {
642 self.cursor += 1;
643
644 if let Some(current) = order {
645 order = Some(current + 1);
646 }
647
648 if let Some(list_item) = items_stack.pop() {
649 let source_range = source_ranges
650 .remove(&depth)
651 .unwrap_or(start_item_range.clone());
652
653 // We need to remove the last character of the source range, because it includes the newline character
654 let source_range = source_range.start..source_range.end - 1;
655 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
656 source_range,
657 content: list_item.content,
658 depth,
659 item_type: list_item.item_type,
660 nested: false,
661 });
662
663 if let Some(index) = insertion_indices.get(&depth) {
664 items.insert(*index, item);
665 insertion_indices.remove(&depth);
666 } else {
667 items.push(item);
668 }
669 }
670 }
671 _ => {
672 if depth == 0 {
673 break;
674 }
675 // This can only happen if a list item starts with more then one paragraph,
676 // or the list item contains blocks that should be rendered after the nested list items
677 let block = self.parse_block().await;
678 if let Some(block) = block {
679 if let Some(list_item) = items_stack.last_mut() {
680 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
681 if !insertion_indices.contains_key(&depth) {
682 list_item.content.extend(block);
683 continue;
684 }
685 }
686
687 // Otherwise we need to insert the block after all the nested items
688 // that have been parsed so far
689 items.extend(block);
690 } else {
691 self.cursor += 1;
692 }
693 }
694 }
695 }
696
697 items
698 }
699
700 #[async_recursion]
701 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
702 let (_event, source_range) = self.previous().unwrap();
703 let source_range = source_range.clone();
704 let mut nested_depth = 1;
705
706 let mut children: Vec<ParsedMarkdownElement> = vec![];
707
708 while !self.eof() {
709 let block = self.parse_block().await;
710
711 if let Some(block) = block {
712 children.extend(block);
713 } else {
714 break;
715 }
716
717 if self.eof() {
718 break;
719 }
720
721 let (current, _source_range) = self.current().unwrap();
722 match current {
723 // This is a nested block quote.
724 // Record that we're in a nested block quote and continue parsing.
725 // We don't need to advance the cursor since the next
726 // call to `parse_block` will handle it.
727 Event::Start(Tag::BlockQuote(_kind)) => {
728 nested_depth += 1;
729 }
730 Event::End(TagEnd::BlockQuote(_kind)) => {
731 nested_depth -= 1;
732 if nested_depth == 0 {
733 self.cursor += 1;
734 break;
735 }
736 }
737 _ => {}
738 };
739 }
740
741 ParsedMarkdownBlockQuote {
742 source_range,
743 children,
744 }
745 }
746
747 async fn parse_code_block(
748 &mut self,
749 language: Option<String>,
750 ) -> Option<ParsedMarkdownCodeBlock> {
751 let Some((_event, source_range)) = self.previous() else {
752 return None;
753 };
754
755 let source_range = source_range.clone();
756 let mut code = String::new();
757
758 while !self.eof() {
759 let Some((current, _source_range)) = self.current() else {
760 break;
761 };
762
763 match current {
764 Event::Text(text) => {
765 code.push_str(text);
766 self.cursor += 1;
767 }
768 Event::End(TagEnd::CodeBlock) => {
769 self.cursor += 1;
770 break;
771 }
772 _ => {
773 break;
774 }
775 }
776 }
777
778 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
779
780 let highlights = if let Some(language) = &language {
781 if let Some(registry) = &self.language_registry {
782 let rope: language::Rope = code.as_str().into();
783 registry
784 .language_for_name_or_extension(language)
785 .await
786 .map(|l| l.highlight_text(&rope, 0..code.len()))
787 .ok()
788 } else {
789 None
790 }
791 } else {
792 None
793 };
794
795 Some(ParsedMarkdownCodeBlock {
796 source_range,
797 contents: code.into(),
798 language,
799 highlights,
800 })
801 }
802
803 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
804 let mut elements = Vec::new();
805 let Some((_event, _source_range)) = self.previous() else {
806 return elements;
807 };
808
809 let mut html_source_range_start = None;
810 let mut html_source_range_end = None;
811 let mut html_buffer = String::new();
812
813 while !self.eof() {
814 let Some((current, source_range)) = self.current() else {
815 break;
816 };
817 let source_range = source_range.clone();
818 match current {
819 Event::Html(html) => {
820 html_source_range_start.get_or_insert(source_range.start);
821 html_source_range_end = Some(source_range.end);
822 html_buffer.push_str(html);
823 self.cursor += 1;
824 }
825 Event::End(TagEnd::CodeBlock) => {
826 self.cursor += 1;
827 break;
828 }
829 _ => {
830 break;
831 }
832 }
833 }
834
835 let bytes = cleanup_html(&html_buffer);
836
837 let mut cursor = std::io::Cursor::new(bytes);
838 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
839 .from_utf8()
840 .read_from(&mut cursor)
841 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
842 {
843 self.parse_html_node(
844 start..end,
845 &dom.document,
846 &mut elements,
847 &ParseHtmlNodeContext::default(),
848 );
849 }
850
851 elements
852 }
853
854 fn parse_html_node(
855 &self,
856 source_range: Range<usize>,
857 node: &Rc<markup5ever_rcdom::Node>,
858 elements: &mut Vec<ParsedMarkdownElement>,
859 context: &ParseHtmlNodeContext,
860 ) {
861 match &node.data {
862 markup5ever_rcdom::NodeData::Document => {
863 self.consume_children(source_range, node, elements, context);
864 }
865 markup5ever_rcdom::NodeData::Text { contents } => {
866 elements.push(ParsedMarkdownElement::Paragraph(vec![
867 MarkdownParagraphChunk::Text(ParsedMarkdownText {
868 source_range,
869 regions: Vec::default(),
870 region_ranges: Vec::default(),
871 highlights: Vec::default(),
872 contents: contents.borrow().to_string().into(),
873 }),
874 ]));
875 }
876 markup5ever_rcdom::NodeData::Comment { .. } => {}
877 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
878 if local_name!("img") == name.local {
879 if let Some(image) = self.extract_image(source_range, attrs) {
880 elements.push(ParsedMarkdownElement::Image(image));
881 }
882 } else if local_name!("p") == name.local {
883 let mut paragraph = MarkdownParagraph::new();
884 self.parse_paragraph(source_range, node, &mut paragraph);
885
886 if !paragraph.is_empty() {
887 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
888 }
889 } else if matches!(
890 name.local,
891 local_name!("h1")
892 | local_name!("h2")
893 | local_name!("h3")
894 | local_name!("h4")
895 | local_name!("h5")
896 | local_name!("h6")
897 ) {
898 let mut paragraph = MarkdownParagraph::new();
899 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
900
901 if !paragraph.is_empty() {
902 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
903 source_range,
904 level: match name.local {
905 local_name!("h1") => HeadingLevel::H1,
906 local_name!("h2") => HeadingLevel::H2,
907 local_name!("h3") => HeadingLevel::H3,
908 local_name!("h4") => HeadingLevel::H4,
909 local_name!("h5") => HeadingLevel::H5,
910 local_name!("h6") => HeadingLevel::H6,
911 _ => unreachable!(),
912 },
913 contents: paragraph,
914 }));
915 }
916 } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
917 if let Some(list_items) = self.extract_html_list(
918 node,
919 local_name!("ol") == name.local,
920 context.list_item_depth,
921 source_range,
922 ) {
923 elements.extend(list_items);
924 }
925 } else if local_name!("blockquote") == name.local {
926 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
927 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
928 }
929 } else if local_name!("table") == name.local {
930 if let Some(table) = self.extract_html_table(node, source_range) {
931 elements.push(ParsedMarkdownElement::Table(table));
932 }
933 } else {
934 self.consume_children(source_range, node, elements, context);
935 }
936 }
937 _ => {}
938 }
939 }
940
941 fn parse_paragraph(
942 &self,
943 source_range: Range<usize>,
944 node: &Rc<markup5ever_rcdom::Node>,
945 paragraph: &mut MarkdownParagraph,
946 ) {
947 match &node.data {
948 markup5ever_rcdom::NodeData::Text { contents } => {
949 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
950 source_range,
951 regions: Vec::default(),
952 region_ranges: Vec::default(),
953 highlights: Vec::default(),
954 contents: contents.borrow().to_string().into(),
955 }));
956 }
957 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
958 if local_name!("img") == name.local {
959 if let Some(image) = self.extract_image(source_range, attrs) {
960 paragraph.push(MarkdownParagraphChunk::Image(image));
961 }
962 } else {
963 self.consume_paragraph(source_range, node, paragraph);
964 }
965 }
966 _ => {}
967 }
968 }
969
970 fn consume_paragraph(
971 &self,
972 source_range: Range<usize>,
973 node: &Rc<markup5ever_rcdom::Node>,
974 paragraph: &mut MarkdownParagraph,
975 ) {
976 for node in node.children.borrow().iter() {
977 self.parse_paragraph(source_range.clone(), node, paragraph);
978 }
979 }
980
981 fn parse_table_row(
982 &self,
983 source_range: Range<usize>,
984 node: &Rc<markup5ever_rcdom::Node>,
985 ) -> Option<ParsedMarkdownTableRow> {
986 let mut columns = Vec::new();
987
988 match &node.data {
989 markup5ever_rcdom::NodeData::Element { name, .. } => {
990 if local_name!("tr") != name.local {
991 return None;
992 }
993
994 for node in node.children.borrow().iter() {
995 if let Some(column) = self.parse_table_column(source_range.clone(), node) {
996 columns.push(column);
997 }
998 }
999 }
1000 _ => {}
1001 }
1002
1003 if columns.is_empty() {
1004 None
1005 } else {
1006 Some(ParsedMarkdownTableRow { columns })
1007 }
1008 }
1009
1010 fn parse_table_column(
1011 &self,
1012 source_range: Range<usize>,
1013 node: &Rc<markup5ever_rcdom::Node>,
1014 ) -> Option<ParsedMarkdownTableColumn> {
1015 match &node.data {
1016 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1017 if !matches!(name.local, local_name!("th") | local_name!("td")) {
1018 return None;
1019 }
1020
1021 let mut children = MarkdownParagraph::new();
1022 self.consume_paragraph(source_range, node, &mut children);
1023
1024 let is_header = matches!(name.local, local_name!("th"));
1025
1026 Some(ParsedMarkdownTableColumn {
1027 col_span: std::cmp::max(
1028 Self::attr_value(attrs, local_name!("colspan"))
1029 .and_then(|span| span.parse().ok())
1030 .unwrap_or(1),
1031 1,
1032 ),
1033 row_span: std::cmp::max(
1034 Self::attr_value(attrs, local_name!("rowspan"))
1035 .and_then(|span| span.parse().ok())
1036 .unwrap_or(1),
1037 1,
1038 ),
1039 is_header,
1040 children,
1041 alignment: Self::attr_value(attrs, local_name!("align"))
1042 .and_then(|align| match align.as_str() {
1043 "left" => Some(ParsedMarkdownTableAlignment::Left),
1044 "center" => Some(ParsedMarkdownTableAlignment::Center),
1045 "right" => Some(ParsedMarkdownTableAlignment::Right),
1046 _ => None,
1047 })
1048 .unwrap_or_else(|| {
1049 if is_header {
1050 ParsedMarkdownTableAlignment::Center
1051 } else {
1052 ParsedMarkdownTableAlignment::default()
1053 }
1054 }),
1055 })
1056 }
1057 _ => None,
1058 }
1059 }
1060
1061 fn consume_children(
1062 &self,
1063 source_range: Range<usize>,
1064 node: &Rc<markup5ever_rcdom::Node>,
1065 elements: &mut Vec<ParsedMarkdownElement>,
1066 context: &ParseHtmlNodeContext,
1067 ) {
1068 for node in node.children.borrow().iter() {
1069 self.parse_html_node(source_range.clone(), node, elements, context);
1070 }
1071 }
1072
1073 fn attr_value(
1074 attrs: &RefCell<Vec<html5ever::Attribute>>,
1075 name: html5ever::LocalName,
1076 ) -> Option<String> {
1077 attrs.borrow().iter().find_map(|attr| {
1078 if attr.name.local == name {
1079 Some(attr.value.to_string())
1080 } else {
1081 None
1082 }
1083 })
1084 }
1085
1086 fn extract_styles_from_attributes(
1087 attrs: &RefCell<Vec<html5ever::Attribute>>,
1088 ) -> HashMap<String, String> {
1089 let mut styles = HashMap::new();
1090
1091 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1092 for decl in style.split(';') {
1093 let mut parts = decl.splitn(2, ':');
1094 if let Some((key, value)) = parts.next().zip(parts.next()) {
1095 styles.insert(
1096 key.trim().to_lowercase().to_string(),
1097 value.trim().to_string(),
1098 );
1099 }
1100 }
1101 }
1102
1103 styles
1104 }
1105
1106 fn extract_image(
1107 &self,
1108 source_range: Range<usize>,
1109 attrs: &RefCell<Vec<html5ever::Attribute>>,
1110 ) -> Option<Image> {
1111 let src = Self::attr_value(attrs, local_name!("src"))?;
1112
1113 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1114
1115 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1116 image.set_alt_text(alt.into());
1117 }
1118
1119 let styles = Self::extract_styles_from_attributes(attrs);
1120
1121 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1122 .or_else(|| styles.get("width").cloned())
1123 .and_then(|width| Self::parse_html_element_dimension(&width))
1124 {
1125 image.set_width(width);
1126 }
1127
1128 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1129 .or_else(|| styles.get("height").cloned())
1130 .and_then(|height| Self::parse_html_element_dimension(&height))
1131 {
1132 image.set_height(height);
1133 }
1134
1135 Some(image)
1136 }
1137
1138 fn extract_html_list(
1139 &self,
1140 node: &Rc<markup5ever_rcdom::Node>,
1141 ordered: bool,
1142 depth: u16,
1143 source_range: Range<usize>,
1144 ) -> Option<Vec<ParsedMarkdownElement>> {
1145 let mut list_items = Vec::with_capacity(node.children.borrow().len());
1146
1147 for (index, node) in node.children.borrow().iter().enumerate() {
1148 match &node.data {
1149 markup5ever_rcdom::NodeData::Element { name, .. } => {
1150 if local_name!("li") != name.local {
1151 continue;
1152 }
1153
1154 let mut content = Vec::new();
1155 self.consume_children(
1156 source_range.clone(),
1157 node,
1158 &mut content,
1159 &ParseHtmlNodeContext {
1160 list_item_depth: depth + 1,
1161 },
1162 );
1163
1164 if !content.is_empty() {
1165 list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1166 depth,
1167 source_range: source_range.clone(),
1168 item_type: if ordered {
1169 ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1170 } else {
1171 ParsedMarkdownListItemType::Unordered
1172 },
1173 content,
1174 nested: true,
1175 }));
1176 }
1177 }
1178 _ => {}
1179 }
1180 }
1181
1182 if list_items.is_empty() {
1183 None
1184 } else {
1185 Some(list_items)
1186 }
1187 }
1188
1189 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1190 if value.ends_with("%") {
1191 value
1192 .trim_end_matches("%")
1193 .parse::<f32>()
1194 .ok()
1195 .map(|value| relative(value / 100.))
1196 } else {
1197 value
1198 .trim_end_matches("px")
1199 .parse()
1200 .ok()
1201 .map(|value| px(value).into())
1202 }
1203 }
1204
1205 fn extract_html_blockquote(
1206 &self,
1207 node: &Rc<markup5ever_rcdom::Node>,
1208 source_range: Range<usize>,
1209 ) -> Option<ParsedMarkdownBlockQuote> {
1210 let mut children = Vec::new();
1211 self.consume_children(
1212 source_range.clone(),
1213 node,
1214 &mut children,
1215 &ParseHtmlNodeContext::default(),
1216 );
1217
1218 if children.is_empty() {
1219 None
1220 } else {
1221 Some(ParsedMarkdownBlockQuote {
1222 children,
1223 source_range,
1224 })
1225 }
1226 }
1227
1228 fn extract_html_table(
1229 &self,
1230 node: &Rc<markup5ever_rcdom::Node>,
1231 source_range: Range<usize>,
1232 ) -> Option<ParsedMarkdownTable> {
1233 let mut header_rows = Vec::new();
1234 let mut body_rows = Vec::new();
1235
1236 // node should be a thead or tbody element
1237 for node in node.children.borrow().iter() {
1238 match &node.data {
1239 markup5ever_rcdom::NodeData::Element { name, .. } => {
1240 if local_name!("thead") == name.local {
1241 // node should be a tr element
1242 for node in node.children.borrow().iter() {
1243 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1244 header_rows.push(row);
1245 }
1246 }
1247 } else if local_name!("tbody") == name.local {
1248 // node should be a tr element
1249 for node in node.children.borrow().iter() {
1250 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1251 body_rows.push(row);
1252 }
1253 }
1254 }
1255 }
1256 _ => {}
1257 }
1258 }
1259
1260 if !header_rows.is_empty() || !body_rows.is_empty() {
1261 Some(ParsedMarkdownTable {
1262 source_range,
1263 body: body_rows,
1264 header: header_rows,
1265 })
1266 } else {
1267 None
1268 }
1269 }
1270}
1271
1272#[cfg(test)]
1273mod tests {
1274 use super::*;
1275 use ParsedMarkdownListItemType::*;
1276 use core::panic;
1277 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1278 use language::{
1279 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1280 };
1281 use pretty_assertions::assert_eq;
1282
1283 async fn parse(input: &str) -> ParsedMarkdown {
1284 parse_markdown(input, None, None).await
1285 }
1286
1287 #[gpui::test]
1288 async fn test_headings() {
1289 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1290
1291 assert_eq!(
1292 parsed.children,
1293 vec![
1294 h1(text("Heading one", 2..13), 0..14),
1295 h2(text("Heading two", 17..28), 14..29),
1296 h3(text("Heading three", 33..46), 29..46),
1297 ]
1298 );
1299 }
1300
1301 #[gpui::test]
1302 async fn test_newlines_dont_new_paragraphs() {
1303 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1304
1305 assert_eq!(
1306 parsed.children,
1307 vec![p("Some text that is bolded and italicized", 0..46)]
1308 );
1309 }
1310
1311 #[gpui::test]
1312 async fn test_heading_with_paragraph() {
1313 let parsed = parse("# Zed\nThe editor").await;
1314
1315 assert_eq!(
1316 parsed.children,
1317 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1318 );
1319 }
1320
1321 #[gpui::test]
1322 async fn test_double_newlines_do_new_paragraphs() {
1323 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1324
1325 assert_eq!(
1326 parsed.children,
1327 vec![
1328 p("Some text that is bolded", 0..29),
1329 p("and italicized", 31..47),
1330 ]
1331 );
1332 }
1333
1334 #[gpui::test]
1335 async fn test_bold_italic_text() {
1336 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1337
1338 assert_eq!(
1339 parsed.children,
1340 vec![p("Some text that is bolded and italicized", 0..45)]
1341 );
1342 }
1343
1344 #[gpui::test]
1345 async fn test_nested_bold_strikethrough_text() {
1346 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1347
1348 assert_eq!(parsed.children.len(), 1);
1349 assert_eq!(
1350 parsed.children[0],
1351 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1352 ParsedMarkdownText {
1353 source_range: 0..35,
1354 contents: "Some bostrikethroughld text".into(),
1355 highlights: Vec::new(),
1356 region_ranges: Vec::new(),
1357 regions: Vec::new(),
1358 }
1359 )])
1360 );
1361
1362 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1363 text
1364 } else {
1365 panic!("Expected a paragraph");
1366 };
1367
1368 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1369 text
1370 } else {
1371 panic!("Expected a text");
1372 };
1373
1374 assert_eq!(
1375 paragraph.highlights,
1376 vec![
1377 (
1378 5..7,
1379 MarkdownHighlight::Style(MarkdownHighlightStyle {
1380 weight: FontWeight::BOLD,
1381 ..Default::default()
1382 }),
1383 ),
1384 (
1385 7..20,
1386 MarkdownHighlight::Style(MarkdownHighlightStyle {
1387 weight: FontWeight::BOLD,
1388 strikethrough: true,
1389 ..Default::default()
1390 }),
1391 ),
1392 (
1393 20..22,
1394 MarkdownHighlight::Style(MarkdownHighlightStyle {
1395 weight: FontWeight::BOLD,
1396 ..Default::default()
1397 }),
1398 ),
1399 ]
1400 );
1401 }
1402
1403 #[gpui::test]
1404 async fn test_text_with_inline_html() {
1405 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1406
1407 assert_eq!(
1408 parsed.children,
1409 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1410 );
1411 }
1412
1413 #[gpui::test]
1414 async fn test_raw_links_detection() {
1415 let parsed = parse("Checkout this https://zed.dev link").await;
1416
1417 assert_eq!(
1418 parsed.children,
1419 vec![p("Checkout this https://zed.dev link", 0..34)]
1420 );
1421 }
1422
1423 #[gpui::test]
1424 async fn test_empty_image() {
1425 let parsed = parse("![]()").await;
1426
1427 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1428 text
1429 } else {
1430 panic!("Expected a paragraph");
1431 };
1432 assert_eq!(paragraph.len(), 0);
1433 }
1434
1435 #[gpui::test]
1436 async fn test_image_links_detection() {
1437 let parsed = parse("").await;
1438
1439 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1440 text
1441 } else {
1442 panic!("Expected a paragraph");
1443 };
1444 assert_eq!(
1445 paragraph[0],
1446 MarkdownParagraphChunk::Image(Image {
1447 source_range: 0..111,
1448 link: Link::Web {
1449 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1450 },
1451 alt_text: Some("test".into()),
1452 height: None,
1453 width: None,
1454 },)
1455 );
1456 }
1457
1458 #[gpui::test]
1459 async fn test_image_alt_text() {
1460 let parsed = parse("[](https://zed.dev)\n ").await;
1461
1462 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1463 text
1464 } else {
1465 panic!("Expected a paragraph");
1466 };
1467 assert_eq!(
1468 paragraph[0],
1469 MarkdownParagraphChunk::Image(Image {
1470 source_range: 0..142,
1471 link: Link::Web {
1472 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1473 },
1474 alt_text: Some("Zed".into()),
1475 height: None,
1476 width: None,
1477 },)
1478 );
1479 }
1480
1481 #[gpui::test]
1482 async fn test_image_without_alt_text() {
1483 let parsed = parse("").await;
1484
1485 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1486 text
1487 } else {
1488 panic!("Expected a paragraph");
1489 };
1490 assert_eq!(
1491 paragraph[0],
1492 MarkdownParagraphChunk::Image(Image {
1493 source_range: 0..31,
1494 link: Link::Web {
1495 url: "http://example.com/foo.png".to_string(),
1496 },
1497 alt_text: None,
1498 height: None,
1499 width: None,
1500 },)
1501 );
1502 }
1503
1504 #[gpui::test]
1505 async fn test_image_with_alt_text_containing_formatting() {
1506 let parsed = parse("").await;
1507
1508 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1509 panic!("Expected a paragraph");
1510 };
1511 assert_eq!(
1512 chunks,
1513 &[MarkdownParagraphChunk::Image(Image {
1514 source_range: 0..44,
1515 link: Link::Web {
1516 url: "http://example.com/foo.png".to_string(),
1517 },
1518 alt_text: Some("foo bar baz".into()),
1519 height: None,
1520 width: None,
1521 }),],
1522 );
1523 }
1524
1525 #[gpui::test]
1526 async fn test_images_with_text_in_between() {
1527 let parsed = parse(
1528 "\nLorem Ipsum\n",
1529 )
1530 .await;
1531
1532 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1533 text
1534 } else {
1535 panic!("Expected a paragraph");
1536 };
1537 assert_eq!(
1538 chunks,
1539 &vec![
1540 MarkdownParagraphChunk::Image(Image {
1541 source_range: 0..81,
1542 link: Link::Web {
1543 url: "http://example.com/foo.png".to_string(),
1544 },
1545 alt_text: Some("foo".into()),
1546 height: None,
1547 width: None,
1548 }),
1549 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1550 source_range: 0..81,
1551 contents: " Lorem Ipsum ".into(),
1552 highlights: Vec::new(),
1553 region_ranges: Vec::new(),
1554 regions: Vec::new(),
1555 }),
1556 MarkdownParagraphChunk::Image(Image {
1557 source_range: 0..81,
1558 link: Link::Web {
1559 url: "http://example.com/bar.png".to_string(),
1560 },
1561 alt_text: Some("bar".into()),
1562 height: None,
1563 width: None,
1564 })
1565 ]
1566 );
1567 }
1568
1569 #[test]
1570 fn test_parse_html_element_dimension() {
1571 // Test percentage values
1572 assert_eq!(
1573 MarkdownParser::parse_html_element_dimension("50%"),
1574 Some(DefiniteLength::Fraction(0.5))
1575 );
1576 assert_eq!(
1577 MarkdownParser::parse_html_element_dimension("100%"),
1578 Some(DefiniteLength::Fraction(1.0))
1579 );
1580 assert_eq!(
1581 MarkdownParser::parse_html_element_dimension("25%"),
1582 Some(DefiniteLength::Fraction(0.25))
1583 );
1584 assert_eq!(
1585 MarkdownParser::parse_html_element_dimension("0%"),
1586 Some(DefiniteLength::Fraction(0.0))
1587 );
1588
1589 // Test pixel values
1590 assert_eq!(
1591 MarkdownParser::parse_html_element_dimension("100px"),
1592 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1593 );
1594 assert_eq!(
1595 MarkdownParser::parse_html_element_dimension("50px"),
1596 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1597 );
1598 assert_eq!(
1599 MarkdownParser::parse_html_element_dimension("0px"),
1600 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1601 );
1602
1603 // Test values without units (should be treated as pixels)
1604 assert_eq!(
1605 MarkdownParser::parse_html_element_dimension("100"),
1606 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1607 );
1608 assert_eq!(
1609 MarkdownParser::parse_html_element_dimension("42"),
1610 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1611 );
1612
1613 // Test invalid values
1614 assert_eq!(
1615 MarkdownParser::parse_html_element_dimension("invalid"),
1616 None
1617 );
1618 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1619 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1620 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1621 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1622 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1623
1624 // Test decimal values
1625 assert_eq!(
1626 MarkdownParser::parse_html_element_dimension("50.5%"),
1627 Some(DefiniteLength::Fraction(0.505))
1628 );
1629 assert_eq!(
1630 MarkdownParser::parse_html_element_dimension("100.25px"),
1631 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1632 );
1633 assert_eq!(
1634 MarkdownParser::parse_html_element_dimension("42.0"),
1635 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1636 );
1637 }
1638
1639 #[gpui::test]
1640 async fn test_html_unordered_list() {
1641 let parsed = parse(
1642 "<ul>
1643 <li>Item 1</li>
1644 <li>Item 2</li>
1645 </ul>",
1646 )
1647 .await;
1648
1649 assert_eq!(
1650 ParsedMarkdown {
1651 children: vec![
1652 nested_list_item(
1653 0..82,
1654 1,
1655 ParsedMarkdownListItemType::Unordered,
1656 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1657 ),
1658 nested_list_item(
1659 0..82,
1660 1,
1661 ParsedMarkdownListItemType::Unordered,
1662 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1663 ),
1664 ]
1665 },
1666 parsed
1667 );
1668 }
1669
1670 #[gpui::test]
1671 async fn test_html_ordered_list() {
1672 let parsed = parse(
1673 "<ol>
1674 <li>Item 1</li>
1675 <li>Item 2</li>
1676 </ol>",
1677 )
1678 .await;
1679
1680 assert_eq!(
1681 ParsedMarkdown {
1682 children: vec![
1683 nested_list_item(
1684 0..82,
1685 1,
1686 ParsedMarkdownListItemType::Ordered(1),
1687 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1688 ),
1689 nested_list_item(
1690 0..82,
1691 1,
1692 ParsedMarkdownListItemType::Ordered(2),
1693 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1694 ),
1695 ]
1696 },
1697 parsed
1698 );
1699 }
1700
1701 #[gpui::test]
1702 async fn test_html_nested_ordered_list() {
1703 let parsed = parse(
1704 "<ol>
1705 <li>Item 1</li>
1706 <li>Item 2
1707 <ol>
1708 <li>Sub-Item 1</li>
1709 <li>Sub-Item 2</li>
1710 </ol>
1711 </li>
1712 </ol>",
1713 )
1714 .await;
1715
1716 assert_eq!(
1717 ParsedMarkdown {
1718 children: vec![
1719 nested_list_item(
1720 0..216,
1721 1,
1722 ParsedMarkdownListItemType::Ordered(1),
1723 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1724 ),
1725 nested_list_item(
1726 0..216,
1727 1,
1728 ParsedMarkdownListItemType::Ordered(2),
1729 vec![
1730 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1731 nested_list_item(
1732 0..216,
1733 2,
1734 ParsedMarkdownListItemType::Ordered(1),
1735 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1736 ),
1737 nested_list_item(
1738 0..216,
1739 2,
1740 ParsedMarkdownListItemType::Ordered(2),
1741 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1742 ),
1743 ]
1744 ),
1745 ]
1746 },
1747 parsed
1748 );
1749 }
1750
1751 #[gpui::test]
1752 async fn test_html_nested_unordered_list() {
1753 let parsed = parse(
1754 "<ul>
1755 <li>Item 1</li>
1756 <li>Item 2
1757 <ul>
1758 <li>Sub-Item 1</li>
1759 <li>Sub-Item 2</li>
1760 </ul>
1761 </li>
1762 </ul>",
1763 )
1764 .await;
1765
1766 assert_eq!(
1767 ParsedMarkdown {
1768 children: vec![
1769 nested_list_item(
1770 0..216,
1771 1,
1772 ParsedMarkdownListItemType::Unordered,
1773 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1774 ),
1775 nested_list_item(
1776 0..216,
1777 1,
1778 ParsedMarkdownListItemType::Unordered,
1779 vec![
1780 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1781 nested_list_item(
1782 0..216,
1783 2,
1784 ParsedMarkdownListItemType::Unordered,
1785 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1786 ),
1787 nested_list_item(
1788 0..216,
1789 2,
1790 ParsedMarkdownListItemType::Unordered,
1791 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1792 ),
1793 ]
1794 ),
1795 ]
1796 },
1797 parsed
1798 );
1799 }
1800
1801 #[gpui::test]
1802 async fn test_inline_html_image_tag() {
1803 let parsed =
1804 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1805 .await;
1806
1807 assert_eq!(
1808 ParsedMarkdown {
1809 children: vec![ParsedMarkdownElement::Paragraph(vec![
1810 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1811 source_range: 0..71,
1812 contents: "Some text".into(),
1813 highlights: Default::default(),
1814 region_ranges: Default::default(),
1815 regions: Default::default()
1816 }),
1817 MarkdownParagraphChunk::Image(Image {
1818 source_range: 0..71,
1819 link: Link::Web {
1820 url: "http://example.com/foo.png".to_string(),
1821 },
1822 alt_text: None,
1823 height: None,
1824 width: None,
1825 }),
1826 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1827 source_range: 0..71,
1828 contents: " some more text".into(),
1829 highlights: Default::default(),
1830 region_ranges: Default::default(),
1831 regions: Default::default()
1832 }),
1833 ])]
1834 },
1835 parsed
1836 );
1837 }
1838
1839 #[gpui::test]
1840 async fn test_html_block_quote() {
1841 let parsed = parse(
1842 "<blockquote>
1843 <p>some description</p>
1844 </blockquote>",
1845 )
1846 .await;
1847
1848 assert_eq!(
1849 ParsedMarkdown {
1850 children: vec![block_quote(
1851 vec![ParsedMarkdownElement::Paragraph(text(
1852 "some description",
1853 0..78
1854 ))],
1855 0..78,
1856 )]
1857 },
1858 parsed
1859 );
1860 }
1861
1862 #[gpui::test]
1863 async fn test_html_nested_block_quote() {
1864 let parsed = parse(
1865 "<blockquote>
1866 <p>some description</p>
1867 <blockquote>
1868 <p>second description</p>
1869 </blockquote>
1870 </blockquote>",
1871 )
1872 .await;
1873
1874 assert_eq!(
1875 ParsedMarkdown {
1876 children: vec![block_quote(
1877 vec![
1878 ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
1879 block_quote(
1880 vec![ParsedMarkdownElement::Paragraph(text(
1881 "second description",
1882 0..179
1883 ))],
1884 0..179,
1885 )
1886 ],
1887 0..179,
1888 )]
1889 },
1890 parsed
1891 );
1892 }
1893
1894 #[gpui::test]
1895 async fn test_html_table() {
1896 let parsed = parse(
1897 "<table>
1898 <thead>
1899 <tr>
1900 <th>Id</th>
1901 <th>Name</th>
1902 </tr>
1903 </thead>
1904 <tbody>
1905 <tr>
1906 <td>1</td>
1907 <td>Chris</td>
1908 </tr>
1909 <tr>
1910 <td>2</td>
1911 <td>Dennis</td>
1912 </tr>
1913 </tbody>
1914 </table>",
1915 )
1916 .await;
1917
1918 assert_eq!(
1919 ParsedMarkdown {
1920 children: vec![ParsedMarkdownElement::Table(table(
1921 0..366,
1922 vec![row(vec![
1923 column(
1924 1,
1925 1,
1926 true,
1927 text("Id", 0..366),
1928 ParsedMarkdownTableAlignment::Center
1929 ),
1930 column(
1931 1,
1932 1,
1933 true,
1934 text("Name ", 0..366),
1935 ParsedMarkdownTableAlignment::Center
1936 )
1937 ])],
1938 vec![
1939 row(vec![
1940 column(
1941 1,
1942 1,
1943 false,
1944 text("1", 0..366),
1945 ParsedMarkdownTableAlignment::None
1946 ),
1947 column(
1948 1,
1949 1,
1950 false,
1951 text("Chris", 0..366),
1952 ParsedMarkdownTableAlignment::None
1953 )
1954 ]),
1955 row(vec![
1956 column(
1957 1,
1958 1,
1959 false,
1960 text("2", 0..366),
1961 ParsedMarkdownTableAlignment::None
1962 ),
1963 column(
1964 1,
1965 1,
1966 false,
1967 text("Dennis", 0..366),
1968 ParsedMarkdownTableAlignment::None
1969 )
1970 ]),
1971 ],
1972 ))],
1973 },
1974 parsed
1975 );
1976 }
1977
1978 #[gpui::test]
1979 async fn test_html_table_without_headings() {
1980 let parsed = parse(
1981 "<table>
1982 <tbody>
1983 <tr>
1984 <td>1</td>
1985 <td>Chris</td>
1986 </tr>
1987 <tr>
1988 <td>2</td>
1989 <td>Dennis</td>
1990 </tr>
1991 </tbody>
1992 </table>",
1993 )
1994 .await;
1995
1996 assert_eq!(
1997 ParsedMarkdown {
1998 children: vec![ParsedMarkdownElement::Table(table(
1999 0..240,
2000 vec![],
2001 vec![
2002 row(vec![
2003 column(
2004 1,
2005 1,
2006 false,
2007 text("1", 0..240),
2008 ParsedMarkdownTableAlignment::None
2009 ),
2010 column(
2011 1,
2012 1,
2013 false,
2014 text("Chris", 0..240),
2015 ParsedMarkdownTableAlignment::None
2016 )
2017 ]),
2018 row(vec![
2019 column(
2020 1,
2021 1,
2022 false,
2023 text("2", 0..240),
2024 ParsedMarkdownTableAlignment::None
2025 ),
2026 column(
2027 1,
2028 1,
2029 false,
2030 text("Dennis", 0..240),
2031 ParsedMarkdownTableAlignment::None
2032 )
2033 ]),
2034 ],
2035 ))],
2036 },
2037 parsed
2038 );
2039 }
2040
2041 #[gpui::test]
2042 async fn test_html_table_without_body() {
2043 let parsed = parse(
2044 "<table>
2045 <thead>
2046 <tr>
2047 <th>Id</th>
2048 <th>Name</th>
2049 </tr>
2050 </thead>
2051 </table>",
2052 )
2053 .await;
2054
2055 assert_eq!(
2056 ParsedMarkdown {
2057 children: vec![ParsedMarkdownElement::Table(table(
2058 0..150,
2059 vec![row(vec![
2060 column(
2061 1,
2062 1,
2063 true,
2064 text("Id", 0..150),
2065 ParsedMarkdownTableAlignment::Center
2066 ),
2067 column(
2068 1,
2069 1,
2070 true,
2071 text("Name", 0..150),
2072 ParsedMarkdownTableAlignment::Center
2073 )
2074 ])],
2075 vec![],
2076 ))],
2077 },
2078 parsed
2079 );
2080 }
2081
2082 #[gpui::test]
2083 async fn test_html_heading_tags() {
2084 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2085
2086 assert_eq!(
2087 ParsedMarkdown {
2088 children: vec![
2089 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2090 level: HeadingLevel::H1,
2091 source_range: 0..96,
2092 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2093 source_range: 0..96,
2094 contents: "Heading".into(),
2095 highlights: Vec::default(),
2096 region_ranges: Vec::default(),
2097 regions: Vec::default()
2098 })],
2099 }),
2100 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2101 level: HeadingLevel::H2,
2102 source_range: 0..96,
2103 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2104 source_range: 0..96,
2105 contents: "Heading".into(),
2106 highlights: Vec::default(),
2107 region_ranges: Vec::default(),
2108 regions: Vec::default()
2109 })],
2110 }),
2111 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2112 level: HeadingLevel::H3,
2113 source_range: 0..96,
2114 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2115 source_range: 0..96,
2116 contents: "Heading".into(),
2117 highlights: Vec::default(),
2118 region_ranges: Vec::default(),
2119 regions: Vec::default()
2120 })],
2121 }),
2122 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2123 level: HeadingLevel::H4,
2124 source_range: 0..96,
2125 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2126 source_range: 0..96,
2127 contents: "Heading".into(),
2128 highlights: Vec::default(),
2129 region_ranges: Vec::default(),
2130 regions: Vec::default()
2131 })],
2132 }),
2133 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2134 level: HeadingLevel::H5,
2135 source_range: 0..96,
2136 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2137 source_range: 0..96,
2138 contents: "Heading".into(),
2139 highlights: Vec::default(),
2140 region_ranges: Vec::default(),
2141 regions: Vec::default()
2142 })],
2143 }),
2144 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2145 level: HeadingLevel::H6,
2146 source_range: 0..96,
2147 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2148 source_range: 0..96,
2149 contents: "Heading".into(),
2150 highlights: Vec::default(),
2151 region_ranges: Vec::default(),
2152 regions: Vec::default()
2153 })],
2154 }),
2155 ],
2156 },
2157 parsed
2158 );
2159 }
2160
2161 #[gpui::test]
2162 async fn test_html_image_tag() {
2163 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2164
2165 assert_eq!(
2166 ParsedMarkdown {
2167 children: vec![ParsedMarkdownElement::Image(Image {
2168 source_range: 0..40,
2169 link: Link::Web {
2170 url: "http://example.com/foo.png".to_string(),
2171 },
2172 alt_text: None,
2173 height: None,
2174 width: None,
2175 })]
2176 },
2177 parsed
2178 );
2179 }
2180
2181 #[gpui::test]
2182 async fn test_html_image_tag_with_alt_text() {
2183 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2184
2185 assert_eq!(
2186 ParsedMarkdown {
2187 children: vec![ParsedMarkdownElement::Image(Image {
2188 source_range: 0..50,
2189 link: Link::Web {
2190 url: "http://example.com/foo.png".to_string(),
2191 },
2192 alt_text: Some("Foo".into()),
2193 height: None,
2194 width: None,
2195 })]
2196 },
2197 parsed
2198 );
2199 }
2200
2201 #[gpui::test]
2202 async fn test_html_image_tag_with_height_and_width() {
2203 let parsed =
2204 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2205
2206 assert_eq!(
2207 ParsedMarkdown {
2208 children: vec![ParsedMarkdownElement::Image(Image {
2209 source_range: 0..65,
2210 link: Link::Web {
2211 url: "http://example.com/foo.png".to_string(),
2212 },
2213 alt_text: None,
2214 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2215 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2216 })]
2217 },
2218 parsed
2219 );
2220 }
2221
2222 #[gpui::test]
2223 async fn test_html_image_style_tag_with_height_and_width() {
2224 let parsed = parse(
2225 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2226 )
2227 .await;
2228
2229 assert_eq!(
2230 ParsedMarkdown {
2231 children: vec![ParsedMarkdownElement::Image(Image {
2232 source_range: 0..75,
2233 link: Link::Web {
2234 url: "http://example.com/foo.png".to_string(),
2235 },
2236 alt_text: None,
2237 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2238 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2239 })]
2240 },
2241 parsed
2242 );
2243 }
2244
2245 #[gpui::test]
2246 async fn test_header_only_table() {
2247 let markdown = "\
2248| Header 1 | Header 2 |
2249|----------|----------|
2250
2251Some other content
2252";
2253
2254 let expected_table = table(
2255 0..48,
2256 vec![row(vec![
2257 column(
2258 1,
2259 1,
2260 true,
2261 text("Header 1", 1..11),
2262 ParsedMarkdownTableAlignment::None,
2263 ),
2264 column(
2265 1,
2266 1,
2267 true,
2268 text("Header 2", 12..22),
2269 ParsedMarkdownTableAlignment::None,
2270 ),
2271 ])],
2272 vec![],
2273 );
2274
2275 assert_eq!(
2276 parse(markdown).await.children[0],
2277 ParsedMarkdownElement::Table(expected_table)
2278 );
2279 }
2280
2281 #[gpui::test]
2282 async fn test_basic_table() {
2283 let markdown = "\
2284| Header 1 | Header 2 |
2285|----------|----------|
2286| Cell 1 | Cell 2 |
2287| Cell 3 | Cell 4 |";
2288
2289 let expected_table = table(
2290 0..95,
2291 vec![row(vec![
2292 column(
2293 1,
2294 1,
2295 true,
2296 text("Header 1", 1..11),
2297 ParsedMarkdownTableAlignment::None,
2298 ),
2299 column(
2300 1,
2301 1,
2302 true,
2303 text("Header 2", 12..22),
2304 ParsedMarkdownTableAlignment::None,
2305 ),
2306 ])],
2307 vec![
2308 row(vec![
2309 column(
2310 1,
2311 1,
2312 false,
2313 text("Cell 1", 49..59),
2314 ParsedMarkdownTableAlignment::None,
2315 ),
2316 column(
2317 1,
2318 1,
2319 false,
2320 text("Cell 2", 60..70),
2321 ParsedMarkdownTableAlignment::None,
2322 ),
2323 ]),
2324 row(vec![
2325 column(
2326 1,
2327 1,
2328 false,
2329 text("Cell 3", 73..83),
2330 ParsedMarkdownTableAlignment::None,
2331 ),
2332 column(
2333 1,
2334 1,
2335 false,
2336 text("Cell 4", 84..94),
2337 ParsedMarkdownTableAlignment::None,
2338 ),
2339 ]),
2340 ],
2341 );
2342
2343 assert_eq!(
2344 parse(markdown).await.children[0],
2345 ParsedMarkdownElement::Table(expected_table)
2346 );
2347 }
2348
2349 #[gpui::test]
2350 async fn test_list_basic() {
2351 let parsed = parse(
2352 "\
2353* Item 1
2354* Item 2
2355* Item 3
2356",
2357 )
2358 .await;
2359
2360 assert_eq!(
2361 parsed.children,
2362 vec![
2363 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2364 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2365 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2366 ],
2367 );
2368 }
2369
2370 #[gpui::test]
2371 async fn test_list_with_tasks() {
2372 let parsed = parse(
2373 "\
2374- [ ] TODO
2375- [x] Checked
2376",
2377 )
2378 .await;
2379
2380 assert_eq!(
2381 parsed.children,
2382 vec![
2383 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2384 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2385 ],
2386 );
2387 }
2388
2389 #[gpui::test]
2390 async fn test_list_with_indented_task() {
2391 let parsed = parse(
2392 "\
2393- [ ] TODO
2394 - [x] Checked
2395 - Unordered
2396 1. Number 1
2397 1. Number 2
23981. Number A
2399",
2400 )
2401 .await;
2402
2403 assert_eq!(
2404 parsed.children,
2405 vec![
2406 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2407 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2408 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2409 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2410 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2411 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2412 ],
2413 );
2414 }
2415
2416 #[gpui::test]
2417 async fn test_list_with_linebreak_is_handled_correctly() {
2418 let parsed = parse(
2419 "\
2420- [ ] Task 1
2421
2422- [x] Task 2
2423",
2424 )
2425 .await;
2426
2427 assert_eq!(
2428 parsed.children,
2429 vec![
2430 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2431 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2432 ],
2433 );
2434 }
2435
2436 #[gpui::test]
2437 async fn test_list_nested() {
2438 let parsed = parse(
2439 "\
2440* Item 1
2441* Item 2
2442* Item 3
2443
24441. Hello
24451. Two
2446 1. Three
24472. Four
24483. Five
2449
2450* First
2451 1. Hello
2452 1. Goodbyte
2453 - Inner
2454 - Inner
2455 2. Goodbyte
2456 - Next item empty
2457 -
2458* Last
2459",
2460 )
2461 .await;
2462
2463 assert_eq!(
2464 parsed.children,
2465 vec![
2466 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2467 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2468 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2469 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2470 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2471 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2472 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2473 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2474 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2475 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2476 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2477 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2478 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2479 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2480 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2481 list_item(186..190, 3, Unordered, vec![]),
2482 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2483 ]
2484 );
2485 }
2486
2487 #[gpui::test]
2488 async fn test_list_with_nested_content() {
2489 let parsed = parse(
2490 "\
2491* This is a list item with two paragraphs.
2492
2493 This is the second paragraph in the list item.
2494",
2495 )
2496 .await;
2497
2498 assert_eq!(
2499 parsed.children,
2500 vec![list_item(
2501 0..96,
2502 1,
2503 Unordered,
2504 vec![
2505 p("This is a list item with two paragraphs.", 4..44),
2506 p("This is the second paragraph in the list item.", 50..97)
2507 ],
2508 ),],
2509 );
2510 }
2511
2512 #[gpui::test]
2513 async fn test_list_item_with_inline_html() {
2514 let parsed = parse(
2515 "\
2516* This is a list item with an inline HTML <sometag>tag</sometag>.
2517",
2518 )
2519 .await;
2520
2521 assert_eq!(
2522 parsed.children,
2523 vec![list_item(
2524 0..67,
2525 1,
2526 Unordered,
2527 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2528 ),],
2529 );
2530 }
2531
2532 #[gpui::test]
2533 async fn test_nested_list_with_paragraph_inside() {
2534 let parsed = parse(
2535 "\
25361. a
2537 1. b
2538 1. c
2539
2540 text
2541
2542 1. d
2543",
2544 )
2545 .await;
2546
2547 assert_eq!(
2548 parsed.children,
2549 vec![
2550 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2551 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2552 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2553 p("text", 32..37),
2554 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2555 ],
2556 );
2557 }
2558
2559 #[gpui::test]
2560 async fn test_list_with_leading_text() {
2561 let parsed = parse(
2562 "\
2563* `code`
2564* **bold**
2565* [link](https://example.com)
2566",
2567 )
2568 .await;
2569
2570 assert_eq!(
2571 parsed.children,
2572 vec![
2573 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2574 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2575 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2576 ],
2577 );
2578 }
2579
2580 #[gpui::test]
2581 async fn test_simple_block_quote() {
2582 let parsed = parse("> Simple block quote with **styled text**").await;
2583
2584 assert_eq!(
2585 parsed.children,
2586 vec![block_quote(
2587 vec![p("Simple block quote with styled text", 2..41)],
2588 0..41
2589 )]
2590 );
2591 }
2592
2593 #[gpui::test]
2594 async fn test_simple_block_quote_with_multiple_lines() {
2595 let parsed = parse(
2596 "\
2597> # Heading
2598> More
2599> text
2600>
2601> More text
2602",
2603 )
2604 .await;
2605
2606 assert_eq!(
2607 parsed.children,
2608 vec![block_quote(
2609 vec![
2610 h1(text("Heading", 4..11), 2..12),
2611 p("More text", 14..26),
2612 p("More text", 30..40)
2613 ],
2614 0..40
2615 )]
2616 );
2617 }
2618
2619 #[gpui::test]
2620 async fn test_nested_block_quote() {
2621 let parsed = parse(
2622 "\
2623> A
2624>
2625> > # B
2626>
2627> C
2628
2629More text
2630",
2631 )
2632 .await;
2633
2634 assert_eq!(
2635 parsed.children,
2636 vec![
2637 block_quote(
2638 vec![
2639 p("A", 2..4),
2640 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2641 p("C", 18..20)
2642 ],
2643 0..20
2644 ),
2645 p("More text", 21..31)
2646 ]
2647 );
2648 }
2649
2650 #[gpui::test]
2651 async fn test_code_block() {
2652 let parsed = parse(
2653 "\
2654```
2655fn main() {
2656 return 0;
2657}
2658```
2659",
2660 )
2661 .await;
2662
2663 assert_eq!(
2664 parsed.children,
2665 vec![code_block(
2666 None,
2667 "fn main() {\n return 0;\n}",
2668 0..35,
2669 None
2670 )]
2671 );
2672 }
2673
2674 #[gpui::test]
2675 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2676 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2677 language_registry.add(rust_lang());
2678
2679 let parsed = parse_markdown(
2680 "\
2681```rust
2682fn main() {
2683 return 0;
2684}
2685```
2686",
2687 None,
2688 Some(language_registry),
2689 )
2690 .await;
2691
2692 assert_eq!(
2693 parsed.children,
2694 vec![code_block(
2695 Some("rust".to_string()),
2696 "fn main() {\n return 0;\n}",
2697 0..39,
2698 Some(vec![])
2699 )]
2700 );
2701 }
2702
2703 fn rust_lang() -> Arc<Language> {
2704 Arc::new(Language::new(
2705 LanguageConfig {
2706 name: "Rust".into(),
2707 matcher: LanguageMatcher {
2708 path_suffixes: vec!["rs".into()],
2709 ..Default::default()
2710 },
2711 collapsed_placeholder: " /* ... */ ".to_string(),
2712 ..Default::default()
2713 },
2714 Some(tree_sitter_rust::LANGUAGE.into()),
2715 ))
2716 }
2717
2718 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2719 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2720 source_range,
2721 level: HeadingLevel::H1,
2722 contents,
2723 })
2724 }
2725
2726 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2727 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2728 source_range,
2729 level: HeadingLevel::H2,
2730 contents,
2731 })
2732 }
2733
2734 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2735 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2736 source_range,
2737 level: HeadingLevel::H3,
2738 contents,
2739 })
2740 }
2741
2742 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2743 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2744 }
2745
2746 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2747 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2748 highlights: Vec::new(),
2749 region_ranges: Vec::new(),
2750 regions: Vec::new(),
2751 source_range,
2752 contents: contents.to_string().into(),
2753 })]
2754 }
2755
2756 fn block_quote(
2757 children: Vec<ParsedMarkdownElement>,
2758 source_range: Range<usize>,
2759 ) -> ParsedMarkdownElement {
2760 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2761 source_range,
2762 children,
2763 })
2764 }
2765
2766 fn code_block(
2767 language: Option<String>,
2768 code: &str,
2769 source_range: Range<usize>,
2770 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2771 ) -> ParsedMarkdownElement {
2772 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2773 source_range,
2774 language,
2775 contents: code.to_string().into(),
2776 highlights,
2777 })
2778 }
2779
2780 fn list_item(
2781 source_range: Range<usize>,
2782 depth: u16,
2783 item_type: ParsedMarkdownListItemType,
2784 content: Vec<ParsedMarkdownElement>,
2785 ) -> ParsedMarkdownElement {
2786 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2787 source_range,
2788 item_type,
2789 depth,
2790 content,
2791 nested: false,
2792 })
2793 }
2794
2795 fn nested_list_item(
2796 source_range: Range<usize>,
2797 depth: u16,
2798 item_type: ParsedMarkdownListItemType,
2799 content: Vec<ParsedMarkdownElement>,
2800 ) -> ParsedMarkdownElement {
2801 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2802 source_range,
2803 item_type,
2804 depth,
2805 content,
2806 nested: true,
2807 })
2808 }
2809
2810 fn table(
2811 source_range: Range<usize>,
2812 header: Vec<ParsedMarkdownTableRow>,
2813 body: Vec<ParsedMarkdownTableRow>,
2814 ) -> ParsedMarkdownTable {
2815 ParsedMarkdownTable {
2816 source_range,
2817 header,
2818 body,
2819 }
2820 }
2821
2822 fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
2823 ParsedMarkdownTableRow { columns }
2824 }
2825
2826 fn column(
2827 col_span: usize,
2828 row_span: usize,
2829 is_header: bool,
2830 children: MarkdownParagraph,
2831 alignment: ParsedMarkdownTableAlignment,
2832 ) -> ParsedMarkdownTableColumn {
2833 ParsedMarkdownTableColumn {
2834 col_span,
2835 row_span,
2836 is_header,
2837 children,
2838 alignment,
2839 }
2840 }
2841
2842 impl PartialEq for ParsedMarkdownTable {
2843 fn eq(&self, other: &Self) -> bool {
2844 self.source_range == other.source_range
2845 && self.header == other.header
2846 && self.body == other.body
2847 }
2848 }
2849
2850 impl PartialEq for ParsedMarkdownText {
2851 fn eq(&self, other: &Self) -> bool {
2852 self.source_range == other.source_range && self.contents == other.contents
2853 }
2854 }
2855}