markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use gpui::FontWeight;
   4use language::LanguageRegistry;
   5use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   6use std::{ops::Range, path::PathBuf, sync::Arc};
   7
   8pub async fn parse_markdown(
   9    markdown_input: &str,
  10    file_location_directory: Option<PathBuf>,
  11    language_registry: Option<Arc<LanguageRegistry>>,
  12) -> ParsedMarkdown {
  13    let options = Options::all();
  14    let parser = Parser::new_ext(markdown_input, options);
  15    let parser = MarkdownParser::new(
  16        parser.into_offset_iter().collect(),
  17        file_location_directory,
  18        language_registry,
  19    );
  20    let renderer = parser.parse_document().await;
  21    ParsedMarkdown {
  22        children: renderer.parsed,
  23    }
  24}
  25
  26struct MarkdownParser<'a> {
  27    tokens: Vec<(Event<'a>, Range<usize>)>,
  28    /// The current index in the tokens array
  29    cursor: usize,
  30    /// The blocks that we have successfully parsed so far
  31    parsed: Vec<ParsedMarkdownElement>,
  32    file_location_directory: Option<PathBuf>,
  33    language_registry: Option<Arc<LanguageRegistry>>,
  34}
  35
  36impl<'a> MarkdownParser<'a> {
  37    fn new(
  38        tokens: Vec<(Event<'a>, Range<usize>)>,
  39        file_location_directory: Option<PathBuf>,
  40        language_registry: Option<Arc<LanguageRegistry>>,
  41    ) -> Self {
  42        Self {
  43            tokens,
  44            file_location_directory,
  45            language_registry,
  46            cursor: 0,
  47            parsed: vec![],
  48        }
  49    }
  50
  51    fn eof(&self) -> bool {
  52        if self.tokens.is_empty() {
  53            return true;
  54        }
  55        self.cursor >= self.tokens.len() - 1
  56    }
  57
  58    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  59        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  60            return self.tokens.last();
  61        }
  62        return self.tokens.get(self.cursor + steps);
  63    }
  64
  65    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  66        if self.cursor == 0 || self.cursor > self.tokens.len() {
  67            return None;
  68        }
  69        return self.tokens.get(self.cursor - 1);
  70    }
  71
  72    fn current(&self) -> Option<&(Event, Range<usize>)> {
  73        return self.peek(0);
  74    }
  75
  76    fn is_text_like(event: &Event) -> bool {
  77        match event {
  78            Event::Text(_)
  79            // Represent an inline code block
  80            | Event::Code(_)
  81            | Event::Html(_)
  82            | Event::FootnoteReference(_)
  83            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
  84            | Event::Start(Tag::Emphasis)
  85            | Event::Start(Tag::Strong)
  86            | Event::Start(Tag::Strikethrough)
  87            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
  88                return true;
  89            }
  90            _ => return false,
  91        }
  92    }
  93
  94    async fn parse_document(mut self) -> Self {
  95        while !self.eof() {
  96            if let Some(block) = self.parse_block().await {
  97                self.parsed.push(block);
  98            }
  99        }
 100        self
 101    }
 102
 103    async fn parse_block(&mut self) -> Option<ParsedMarkdownElement> {
 104        let (current, source_range) = self.current().unwrap();
 105        match current {
 106            Event::Start(tag) => match tag {
 107                Tag::Paragraph => {
 108                    self.cursor += 1;
 109                    let text = self.parse_text(false);
 110                    Some(ParsedMarkdownElement::Paragraph(text))
 111                }
 112                Tag::Heading {
 113                    level,
 114                    id: _,
 115                    classes: _,
 116                    attrs: _,
 117                } => {
 118                    let level = *level;
 119                    self.cursor += 1;
 120                    let heading = self.parse_heading(level);
 121                    Some(ParsedMarkdownElement::Heading(heading))
 122                }
 123                Tag::Table(alignment) => {
 124                    let alignment = alignment.clone();
 125                    self.cursor += 1;
 126                    let table = self.parse_table(alignment);
 127                    Some(ParsedMarkdownElement::Table(table))
 128                }
 129                Tag::List(order) => {
 130                    let order = *order;
 131                    self.cursor += 1;
 132                    let list = self.parse_list(1, order).await;
 133                    Some(ParsedMarkdownElement::List(list))
 134                }
 135                Tag::BlockQuote => {
 136                    self.cursor += 1;
 137                    let block_quote = self.parse_block_quote().await;
 138                    Some(ParsedMarkdownElement::BlockQuote(block_quote))
 139                }
 140                Tag::CodeBlock(kind) => {
 141                    let language = match kind {
 142                        pulldown_cmark::CodeBlockKind::Indented => None,
 143                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 144                            if language.is_empty() {
 145                                None
 146                            } else {
 147                                Some(language.to_string())
 148                            }
 149                        }
 150                    };
 151
 152                    self.cursor += 1;
 153
 154                    let code_block = self.parse_code_block(language).await;
 155                    Some(ParsedMarkdownElement::CodeBlock(code_block))
 156                }
 157                _ => {
 158                    self.cursor += 1;
 159                    None
 160                }
 161            },
 162            Event::Rule => {
 163                let source_range = source_range.clone();
 164                self.cursor += 1;
 165                Some(ParsedMarkdownElement::HorizontalRule(source_range))
 166            }
 167            _ => {
 168                self.cursor += 1;
 169                None
 170            }
 171        }
 172    }
 173
 174    fn parse_text(&mut self, should_complete_on_soft_break: bool) -> ParsedMarkdownText {
 175        let (_current, source_range) = self.previous().unwrap();
 176        let source_range = source_range.clone();
 177
 178        let mut text = String::new();
 179        let mut bold_depth = 0;
 180        let mut italic_depth = 0;
 181        let mut strikethrough_depth = 0;
 182        let mut link: Option<Link> = None;
 183        let mut region_ranges: Vec<Range<usize>> = vec![];
 184        let mut regions: Vec<ParsedRegion> = vec![];
 185        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 186
 187        loop {
 188            if self.eof() {
 189                break;
 190            }
 191
 192            let (current, _source_range) = self.current().unwrap();
 193            let prev_len = text.len();
 194            match current {
 195                Event::SoftBreak => {
 196                    if should_complete_on_soft_break {
 197                        break;
 198                    }
 199
 200                    // `Some text\nSome more text` should be treated as a single line.
 201                    text.push(' ');
 202                }
 203
 204                Event::HardBreak => {
 205                    break;
 206                }
 207
 208                Event::Text(t) => {
 209                    text.push_str(t.as_ref());
 210
 211                    let mut style = MarkdownHighlightStyle::default();
 212
 213                    if bold_depth > 0 {
 214                        style.weight = FontWeight::BOLD;
 215                    }
 216
 217                    if italic_depth > 0 {
 218                        style.italic = true;
 219                    }
 220
 221                    if strikethrough_depth > 0 {
 222                        style.strikethrough = true;
 223                    }
 224
 225                    if let Some(link) = link.clone() {
 226                        region_ranges.push(prev_len..text.len());
 227                        regions.push(ParsedRegion {
 228                            code: false,
 229                            link: Some(link),
 230                        });
 231                        style.underline = true;
 232                    }
 233
 234                    if style != MarkdownHighlightStyle::default() {
 235                        let mut new_highlight = true;
 236                        if let Some((last_range, MarkdownHighlight::Style(last_style))) =
 237                            highlights.last_mut()
 238                        {
 239                            if last_range.end == prev_len && last_style == &style {
 240                                last_range.end = text.len();
 241                                new_highlight = false;
 242                            }
 243                        }
 244                        if new_highlight {
 245                            let range = prev_len..text.len();
 246                            highlights.push((range, MarkdownHighlight::Style(style)));
 247                        }
 248                    }
 249                }
 250
 251                // Note: This event means "inline code" and not "code block"
 252                Event::Code(t) => {
 253                    text.push_str(t.as_ref());
 254                    region_ranges.push(prev_len..text.len());
 255
 256                    if link.is_some() {
 257                        highlights.push((
 258                            prev_len..text.len(),
 259                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 260                                underline: true,
 261                                ..Default::default()
 262                            }),
 263                        ));
 264                    }
 265
 266                    regions.push(ParsedRegion {
 267                        code: true,
 268                        link: link.clone(),
 269                    });
 270                }
 271
 272                Event::Start(tag) => match tag {
 273                    Tag::Emphasis => italic_depth += 1,
 274                    Tag::Strong => bold_depth += 1,
 275                    Tag::Strikethrough => strikethrough_depth += 1,
 276                    Tag::Link {
 277                        link_type: _,
 278                        dest_url,
 279                        title: _,
 280                        id: _,
 281                    } => {
 282                        link = Link::identify(
 283                            self.file_location_directory.clone(),
 284                            dest_url.to_string(),
 285                        );
 286                    }
 287                    _ => {
 288                        break;
 289                    }
 290                },
 291
 292                Event::End(tag) => match tag {
 293                    TagEnd::Emphasis => {
 294                        italic_depth -= 1;
 295                    }
 296                    TagEnd::Strong => {
 297                        bold_depth -= 1;
 298                    }
 299                    TagEnd::Strikethrough => {
 300                        strikethrough_depth -= 1;
 301                    }
 302                    TagEnd::Link => {
 303                        link = None;
 304                    }
 305                    TagEnd::Paragraph => {
 306                        self.cursor += 1;
 307                        break;
 308                    }
 309                    _ => {
 310                        break;
 311                    }
 312                },
 313
 314                _ => {
 315                    break;
 316                }
 317            }
 318
 319            self.cursor += 1;
 320        }
 321
 322        ParsedMarkdownText {
 323            source_range,
 324            contents: text,
 325            highlights,
 326            regions,
 327            region_ranges,
 328        }
 329    }
 330
 331    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 332        let (_event, source_range) = self.previous().unwrap();
 333        let source_range = source_range.clone();
 334        let text = self.parse_text(true);
 335
 336        // Advance past the heading end tag
 337        self.cursor += 1;
 338
 339        ParsedMarkdownHeading {
 340            source_range: source_range.clone(),
 341            level: match level {
 342                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 343                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 344                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 345                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 346                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 347                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 348            },
 349            contents: text,
 350        }
 351    }
 352
 353    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 354        let (_event, source_range) = self.previous().unwrap();
 355        let source_range = source_range.clone();
 356        let mut header = ParsedMarkdownTableRow::new();
 357        let mut body = vec![];
 358        let mut current_row = vec![];
 359        let mut in_header = true;
 360        let column_alignments = alignment
 361            .iter()
 362            .map(|a| Self::convert_alignment(a))
 363            .collect();
 364
 365        loop {
 366            if self.eof() {
 367                break;
 368            }
 369
 370            let (current, _source_range) = self.current().unwrap();
 371            match current {
 372                Event::Start(Tag::TableHead)
 373                | Event::Start(Tag::TableRow)
 374                | Event::End(TagEnd::TableCell) => {
 375                    self.cursor += 1;
 376                }
 377                Event::Start(Tag::TableCell) => {
 378                    self.cursor += 1;
 379                    let cell_contents = self.parse_text(false);
 380                    current_row.push(cell_contents);
 381                }
 382                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 383                    self.cursor += 1;
 384                    let new_row = std::mem::replace(&mut current_row, vec![]);
 385                    if in_header {
 386                        header.children = new_row;
 387                        in_header = false;
 388                    } else {
 389                        let row = ParsedMarkdownTableRow::with_children(new_row);
 390                        body.push(row);
 391                    }
 392                }
 393                Event::End(TagEnd::Table) => {
 394                    self.cursor += 1;
 395                    break;
 396                }
 397                _ => {
 398                    break;
 399                }
 400            }
 401        }
 402
 403        ParsedMarkdownTable {
 404            source_range,
 405            header,
 406            body,
 407            column_alignments,
 408        }
 409    }
 410
 411    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 412        match alignment {
 413            Alignment::None => ParsedMarkdownTableAlignment::None,
 414            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 415            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 416            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 417        }
 418    }
 419
 420    #[async_recursion]
 421    async fn parse_list(&mut self, depth: u16, order: Option<u64>) -> ParsedMarkdownList {
 422        let (_event, source_range) = self.previous().unwrap();
 423        let source_range = source_range.clone();
 424        let mut children = vec![];
 425        let mut inside_list_item = false;
 426        let mut order = order;
 427        let mut task_item = None;
 428
 429        let mut current_list_items: Vec<Box<ParsedMarkdownElement>> = vec![];
 430
 431        while !self.eof() {
 432            let (current, _source_range) = self.current().unwrap();
 433            match current {
 434                Event::Start(Tag::List(order)) => {
 435                    let order = *order;
 436                    self.cursor += 1;
 437
 438                    let inner_list = self.parse_list(depth + 1, order).await;
 439                    let block = ParsedMarkdownElement::List(inner_list);
 440                    current_list_items.push(Box::new(block));
 441                }
 442                Event::End(TagEnd::List(_)) => {
 443                    self.cursor += 1;
 444                    break;
 445                }
 446                Event::Start(Tag::Item) => {
 447                    self.cursor += 1;
 448                    inside_list_item = true;
 449
 450                    // Check for task list marker (`- [ ]` or `- [x]`)
 451                    if let Some(next) = self.current() {
 452                        match next.0 {
 453                            Event::TaskListMarker(checked) => {
 454                                task_item = Some(checked);
 455                                self.cursor += 1;
 456                            }
 457                            _ => {}
 458                        }
 459                    }
 460
 461                    if let Some(next) = self.current() {
 462                        // This is a plain list item.
 463                        // For example `- some text` or `1. [Docs](./docs.md)`
 464                        if MarkdownParser::is_text_like(&next.0) {
 465                            let text = self.parse_text(false);
 466                            let block = ParsedMarkdownElement::Paragraph(text);
 467                            current_list_items.push(Box::new(block));
 468                        } else {
 469                            let block = self.parse_block().await;
 470                            if let Some(block) = block {
 471                                current_list_items.push(Box::new(block));
 472                            }
 473                        }
 474                    }
 475                }
 476                Event::End(TagEnd::Item) => {
 477                    self.cursor += 1;
 478
 479                    let item_type = if let Some(checked) = task_item {
 480                        ParsedMarkdownListItemType::Task(checked)
 481                    } else if let Some(order) = order {
 482                        ParsedMarkdownListItemType::Ordered(order)
 483                    } else {
 484                        ParsedMarkdownListItemType::Unordered
 485                    };
 486
 487                    if let Some(current) = order {
 488                        order = Some(current + 1);
 489                    }
 490
 491                    let contents = std::mem::replace(&mut current_list_items, vec![]);
 492
 493                    children.push(ParsedMarkdownListItem {
 494                        contents,
 495                        depth,
 496                        item_type,
 497                    });
 498
 499                    inside_list_item = false;
 500                    task_item = None;
 501                }
 502                _ => {
 503                    if !inside_list_item {
 504                        break;
 505                    }
 506
 507                    let block = self.parse_block().await;
 508                    if let Some(block) = block {
 509                        current_list_items.push(Box::new(block));
 510                    }
 511                }
 512            }
 513        }
 514
 515        ParsedMarkdownList {
 516            source_range,
 517            children,
 518        }
 519    }
 520
 521    #[async_recursion]
 522    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 523        let (_event, source_range) = self.previous().unwrap();
 524        let source_range = source_range.clone();
 525        let mut nested_depth = 1;
 526
 527        let mut children: Vec<Box<ParsedMarkdownElement>> = vec![];
 528
 529        while !self.eof() {
 530            let block = self.parse_block().await;
 531
 532            if let Some(block) = block {
 533                children.push(Box::new(block));
 534            } else {
 535                break;
 536            }
 537
 538            if self.eof() {
 539                break;
 540            }
 541
 542            let (current, _source_range) = self.current().unwrap();
 543            match current {
 544                // This is a nested block quote.
 545                // Record that we're in a nested block quote and continue parsing.
 546                // We don't need to advance the cursor since the next
 547                // call to `parse_block` will handle it.
 548                Event::Start(Tag::BlockQuote) => {
 549                    nested_depth += 1;
 550                }
 551                Event::End(TagEnd::BlockQuote) => {
 552                    nested_depth -= 1;
 553                    if nested_depth == 0 {
 554                        self.cursor += 1;
 555                        break;
 556                    }
 557                }
 558                _ => {}
 559            };
 560        }
 561
 562        ParsedMarkdownBlockQuote {
 563            source_range,
 564            children,
 565        }
 566    }
 567
 568    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 569        let (_event, source_range) = self.previous().unwrap();
 570        let source_range = source_range.clone();
 571        let mut code = String::new();
 572
 573        while !self.eof() {
 574            let (current, _source_range) = self.current().unwrap();
 575            match current {
 576                Event::Text(text) => {
 577                    code.push_str(&text);
 578                    self.cursor += 1;
 579                }
 580                Event::End(TagEnd::CodeBlock) => {
 581                    self.cursor += 1;
 582                    break;
 583                }
 584                _ => {
 585                    break;
 586                }
 587            }
 588        }
 589
 590        let highlights = if let Some(language) = &language {
 591            if let Some(registry) = &self.language_registry {
 592                let rope: language::Rope = code.as_str().into();
 593                registry
 594                    .language_for_name_or_extension(language)
 595                    .await
 596                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 597                    .ok()
 598            } else {
 599                None
 600            }
 601        } else {
 602            None
 603        };
 604
 605        ParsedMarkdownCodeBlock {
 606            source_range,
 607            contents: code.trim().to_string().into(),
 608            language,
 609            highlights,
 610        }
 611    }
 612}
 613
 614#[cfg(test)]
 615mod tests {
 616    use super::*;
 617
 618    use gpui::BackgroundExecutor;
 619    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 620    use pretty_assertions::assert_eq;
 621
 622    use ParsedMarkdownElement::*;
 623    use ParsedMarkdownListItemType::*;
 624
 625    async fn parse(input: &str) -> ParsedMarkdown {
 626        parse_markdown(input, None, None).await
 627    }
 628
 629    #[gpui::test]
 630    async fn test_headings() {
 631        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 632
 633        assert_eq!(
 634            parsed.children,
 635            vec![
 636                h1(text("Heading one", 0..14), 0..14),
 637                h2(text("Heading two", 14..29), 14..29),
 638                h3(text("Heading three", 29..46), 29..46),
 639            ]
 640        );
 641    }
 642
 643    #[gpui::test]
 644    async fn test_newlines_dont_new_paragraphs() {
 645        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 646
 647        assert_eq!(
 648            parsed.children,
 649            vec![p("Some text that is bolded and italicized", 0..46)]
 650        );
 651    }
 652
 653    #[gpui::test]
 654    async fn test_heading_with_paragraph() {
 655        let parsed = parse("# Zed\nThe editor").await;
 656
 657        assert_eq!(
 658            parsed.children,
 659            vec![h1(text("Zed", 0..6), 0..6), p("The editor", 6..16),]
 660        );
 661    }
 662
 663    #[gpui::test]
 664    async fn test_double_newlines_do_new_paragraphs() {
 665        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 666
 667        assert_eq!(
 668            parsed.children,
 669            vec![
 670                p("Some text that is bolded", 0..29),
 671                p("and italicized", 31..47),
 672            ]
 673        );
 674    }
 675
 676    #[gpui::test]
 677    async fn test_bold_italic_text() {
 678        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 679
 680        assert_eq!(
 681            parsed.children,
 682            vec![p("Some text that is bolded and italicized", 0..45)]
 683        );
 684    }
 685
 686    #[gpui::test]
 687    async fn test_nested_bold_strikethrough_text() {
 688        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 689
 690        assert_eq!(parsed.children.len(), 1);
 691        assert_eq!(
 692            parsed.children[0],
 693            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 694                source_range: 0..35,
 695                contents: "Some bostrikethroughld text".to_string(),
 696                highlights: Vec::new(),
 697                region_ranges: Vec::new(),
 698                regions: Vec::new(),
 699            })
 700        );
 701
 702        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 703            text
 704        } else {
 705            panic!("Expected a paragraph");
 706        };
 707        assert_eq!(
 708            paragraph.highlights,
 709            vec![
 710                (
 711                    5..7,
 712                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 713                        weight: FontWeight::BOLD,
 714                        ..Default::default()
 715                    }),
 716                ),
 717                (
 718                    7..20,
 719                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 720                        weight: FontWeight::BOLD,
 721                        strikethrough: true,
 722                        ..Default::default()
 723                    }),
 724                ),
 725                (
 726                    20..22,
 727                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 728                        weight: FontWeight::BOLD,
 729                        ..Default::default()
 730                    }),
 731                ),
 732            ]
 733        );
 734    }
 735
 736    #[gpui::test]
 737    async fn test_header_only_table() {
 738        let markdown = "\
 739| Header 1 | Header 2 |
 740|----------|----------|
 741
 742Some other content
 743";
 744
 745        let expected_table = table(
 746            0..48,
 747            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 748            vec![],
 749        );
 750
 751        assert_eq!(
 752            parse(markdown).await.children[0],
 753            ParsedMarkdownElement::Table(expected_table)
 754        );
 755    }
 756
 757    #[gpui::test]
 758    async fn test_basic_table() {
 759        let markdown = "\
 760| Header 1 | Header 2 |
 761|----------|----------|
 762| Cell 1   | Cell 2   |
 763| Cell 3   | Cell 4   |";
 764
 765        let expected_table = table(
 766            0..95,
 767            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 768            vec![
 769                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 770                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 771            ],
 772        );
 773
 774        assert_eq!(
 775            parse(markdown).await.children[0],
 776            ParsedMarkdownElement::Table(expected_table)
 777        );
 778    }
 779
 780    #[gpui::test]
 781    async fn test_list_basic() {
 782        let parsed = parse(
 783            "\
 784* Item 1
 785* Item 2
 786* Item 3
 787",
 788        )
 789        .await;
 790
 791        assert_eq!(
 792            parsed.children,
 793            vec![list(
 794                vec![
 795                    list_item(1, Unordered, vec![p("Item 1", 0..9)]),
 796                    list_item(1, Unordered, vec![p("Item 2", 9..18)]),
 797                    list_item(1, Unordered, vec![p("Item 3", 18..27)]),
 798                ],
 799                0..27
 800            ),]
 801        );
 802    }
 803
 804    #[gpui::test]
 805    async fn test_list_with_tasks() {
 806        let parsed = parse(
 807            "\
 808- [ ] TODO
 809- [x] Checked
 810",
 811        )
 812        .await;
 813
 814        assert_eq!(
 815            parsed.children,
 816            vec![list(
 817                vec![
 818                    list_item(1, Task(false), vec![p("TODO", 2..5)]),
 819                    list_item(1, Task(true), vec![p("Checked", 13..16)]),
 820                ],
 821                0..25
 822            ),]
 823        );
 824    }
 825
 826    #[gpui::test]
 827    async fn test_list_nested() {
 828        let parsed = parse(
 829            "\
 830* Item 1
 831* Item 2
 832* Item 3
 833
 8341. Hello
 8351. Two
 836   1. Three
 8372. Four
 8383. Five
 839
 840* First
 841  1. Hello
 842     1. Goodbyte
 843        - Inner
 844        - Inner
 845  2. Goodbyte
 846* Last
 847",
 848        )
 849        .await;
 850
 851        assert_eq!(
 852            parsed.children,
 853            vec![
 854                list(
 855                    vec![
 856                        list_item(1, Unordered, vec![p("Item 1", 0..9)]),
 857                        list_item(1, Unordered, vec![p("Item 2", 9..18)]),
 858                        list_item(1, Unordered, vec![p("Item 3", 18..28)]),
 859                    ],
 860                    0..28
 861                ),
 862                list(
 863                    vec![
 864                        list_item(1, Ordered(1), vec![p("Hello", 28..37)]),
 865                        list_item(
 866                            1,
 867                            Ordered(2),
 868                            vec![
 869                                p("Two", 37..56),
 870                                list(
 871                                    vec![list_item(2, Ordered(1), vec![p("Three", 47..56)]),],
 872                                    47..56
 873                                ),
 874                            ]
 875                        ),
 876                        list_item(1, Ordered(3), vec![p("Four", 56..64)]),
 877                        list_item(1, Ordered(4), vec![p("Five", 64..73)]),
 878                    ],
 879                    28..73
 880                ),
 881                list(
 882                    vec![
 883                        list_item(
 884                            1,
 885                            Unordered,
 886                            vec![
 887                                p("First", 73..155),
 888                                list(
 889                                    vec![
 890                                        list_item(
 891                                            2,
 892                                            Ordered(1),
 893                                            vec![
 894                                                p("Hello", 83..141),
 895                                                list(
 896                                                    vec![list_item(
 897                                                        3,
 898                                                        Ordered(1),
 899                                                        vec![
 900                                                            p("Goodbyte", 97..141),
 901                                                            list(
 902                                                                vec![
 903                                                                    list_item(
 904                                                                        4,
 905                                                                        Unordered,
 906                                                                        vec![p("Inner", 117..125)]
 907                                                                    ),
 908                                                                    list_item(
 909                                                                        4,
 910                                                                        Unordered,
 911                                                                        vec![p("Inner", 133..141)]
 912                                                                    ),
 913                                                                ],
 914                                                                117..141
 915                                                            )
 916                                                        ]
 917                                                    ),],
 918                                                    97..141
 919                                                )
 920                                            ]
 921                                        ),
 922                                        list_item(2, Ordered(2), vec![p("Goodbyte", 143..155)]),
 923                                    ],
 924                                    83..155
 925                                )
 926                            ]
 927                        ),
 928                        list_item(1, Unordered, vec![p("Last", 155..162)]),
 929                    ],
 930                    73..162
 931                ),
 932            ]
 933        );
 934    }
 935
 936    #[gpui::test]
 937    async fn test_list_with_nested_content() {
 938        let parsed = parse(
 939            "\
 940*   This is a list item with two paragraphs.
 941
 942    This is the second paragraph in the list item.",
 943        )
 944        .await;
 945
 946        assert_eq!(
 947            parsed.children,
 948            vec![list(
 949                vec![list_item(
 950                    1,
 951                    Unordered,
 952                    vec![
 953                        p("This is a list item with two paragraphs.", 4..45),
 954                        p("This is the second paragraph in the list item.", 50..96)
 955                    ],
 956                ),],
 957                0..96,
 958            ),]
 959        );
 960    }
 961
 962    #[gpui::test]
 963    async fn test_list_with_leading_text() {
 964        let parsed = parse(
 965            "\
 966* `code`
 967* **bold**
 968* [link](https://example.com)
 969",
 970        )
 971        .await;
 972
 973        assert_eq!(
 974            parsed.children,
 975            vec![list(
 976                vec![
 977                    list_item(1, Unordered, vec![p("code", 0..9)],),
 978                    list_item(1, Unordered, vec![p("bold", 9..20)]),
 979                    list_item(1, Unordered, vec![p("link", 20..50)],)
 980                ],
 981                0..50,
 982            ),]
 983        );
 984    }
 985
 986    #[gpui::test]
 987    async fn test_simple_block_quote() {
 988        let parsed = parse("> Simple block quote with **styled text**").await;
 989
 990        assert_eq!(
 991            parsed.children,
 992            vec![block_quote(
 993                vec![p("Simple block quote with styled text", 2..41)],
 994                0..41
 995            )]
 996        );
 997    }
 998
 999    #[gpui::test]
1000    async fn test_simple_block_quote_with_multiple_lines() {
1001        let parsed = parse(
1002            "\
1003> # Heading
1004> More
1005> text
1006>
1007> More text
1008",
1009        )
1010        .await;
1011
1012        assert_eq!(
1013            parsed.children,
1014            vec![block_quote(
1015                vec![
1016                    h1(text("Heading", 2..12), 2..12),
1017                    p("More text", 14..26),
1018                    p("More text", 30..40)
1019                ],
1020                0..40
1021            )]
1022        );
1023    }
1024
1025    #[gpui::test]
1026    async fn test_nested_block_quote() {
1027        let parsed = parse(
1028            "\
1029> A
1030>
1031> > # B
1032>
1033> C
1034
1035More text
1036",
1037        )
1038        .await;
1039
1040        assert_eq!(
1041            parsed.children,
1042            vec![
1043                block_quote(
1044                    vec![
1045                        p("A", 2..4),
1046                        block_quote(vec![h1(text("B", 10..14), 10..14)], 8..14),
1047                        p("C", 18..20)
1048                    ],
1049                    0..20
1050                ),
1051                p("More text", 21..31)
1052            ]
1053        );
1054    }
1055
1056    #[gpui::test]
1057    async fn test_code_block() {
1058        let parsed = parse(
1059            "\
1060```
1061fn main() {
1062    return 0;
1063}
1064```
1065",
1066        )
1067        .await;
1068
1069        assert_eq!(
1070            parsed.children,
1071            vec![code_block(
1072                None,
1073                "fn main() {\n    return 0;\n}",
1074                0..35,
1075                None
1076            )]
1077        );
1078    }
1079
1080    #[gpui::test]
1081    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1082        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1083        language_registry.add(rust_lang());
1084
1085        let parsed = parse_markdown(
1086            "\
1087```rust
1088fn main() {
1089    return 0;
1090}
1091```
1092",
1093            None,
1094            Some(language_registry),
1095        )
1096        .await;
1097
1098        assert_eq!(
1099            parsed.children,
1100            vec![code_block(
1101                Some("rust".to_string()),
1102                "fn main() {\n    return 0;\n}",
1103                0..39,
1104                Some(vec![])
1105            )]
1106        );
1107    }
1108
1109    fn rust_lang() -> Arc<Language> {
1110        Arc::new(Language::new(
1111            LanguageConfig {
1112                name: "Rust".into(),
1113                matcher: LanguageMatcher {
1114                    path_suffixes: vec!["rs".into()],
1115                    ..Default::default()
1116                },
1117                collapsed_placeholder: " /* ... */ ".to_string(),
1118                ..Default::default()
1119            },
1120            Some(tree_sitter_rust::language()),
1121        ))
1122    }
1123
1124    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1125        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1126            source_range,
1127            level: HeadingLevel::H1,
1128            contents,
1129        })
1130    }
1131
1132    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1133        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1134            source_range,
1135            level: HeadingLevel::H2,
1136            contents,
1137        })
1138    }
1139
1140    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1141        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1142            source_range,
1143            level: HeadingLevel::H3,
1144            contents,
1145        })
1146    }
1147
1148    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1149        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1150    }
1151
1152    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1153        ParsedMarkdownText {
1154            highlights: Vec::new(),
1155            region_ranges: Vec::new(),
1156            regions: Vec::new(),
1157            source_range,
1158            contents: contents.to_string(),
1159        }
1160    }
1161
1162    fn block_quote(
1163        children: Vec<ParsedMarkdownElement>,
1164        source_range: Range<usize>,
1165    ) -> ParsedMarkdownElement {
1166        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1167            source_range,
1168            children: children.into_iter().map(Box::new).collect(),
1169        })
1170    }
1171
1172    fn code_block(
1173        language: Option<String>,
1174        code: &str,
1175        source_range: Range<usize>,
1176        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1177    ) -> ParsedMarkdownElement {
1178        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1179            source_range,
1180            language,
1181            contents: code.to_string().into(),
1182            highlights,
1183        })
1184    }
1185
1186    fn list(
1187        children: Vec<ParsedMarkdownListItem>,
1188        source_range: Range<usize>,
1189    ) -> ParsedMarkdownElement {
1190        List(ParsedMarkdownList {
1191            source_range,
1192            children,
1193        })
1194    }
1195
1196    fn list_item(
1197        depth: u16,
1198        item_type: ParsedMarkdownListItemType,
1199        contents: Vec<ParsedMarkdownElement>,
1200    ) -> ParsedMarkdownListItem {
1201        ParsedMarkdownListItem {
1202            item_type,
1203            depth,
1204            contents: contents.into_iter().map(Box::new).collect(),
1205        }
1206    }
1207
1208    fn table(
1209        source_range: Range<usize>,
1210        header: ParsedMarkdownTableRow,
1211        body: Vec<ParsedMarkdownTableRow>,
1212    ) -> ParsedMarkdownTable {
1213        ParsedMarkdownTable {
1214            column_alignments: Vec::new(),
1215            source_range,
1216            header,
1217            body,
1218        }
1219    }
1220
1221    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1222        ParsedMarkdownTableRow { children }
1223    }
1224
1225    impl PartialEq for ParsedMarkdownTable {
1226        fn eq(&self, other: &Self) -> bool {
1227            self.source_range == other.source_range
1228                && self.header == other.header
1229                && self.body == other.body
1230        }
1231    }
1232
1233    impl PartialEq for ParsedMarkdownText {
1234        fn eq(&self, other: &Self) -> bool {
1235            self.source_range == other.source_range && self.contents == other.contents
1236        }
1237    }
1238}