markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use gpui::FontWeight;
   4use language::LanguageRegistry;
   5use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   6use std::{ops::Range, path::PathBuf, sync::Arc};
   7
   8pub async fn parse_markdown(
   9    markdown_input: &str,
  10    file_location_directory: Option<PathBuf>,
  11    language_registry: Option<Arc<LanguageRegistry>>,
  12) -> ParsedMarkdown {
  13    let options = Options::all();
  14    let parser = Parser::new_ext(markdown_input, options);
  15    let parser = MarkdownParser::new(
  16        parser.into_offset_iter().collect(),
  17        file_location_directory,
  18        language_registry,
  19    );
  20    let renderer = parser.parse_document().await;
  21    ParsedMarkdown {
  22        children: renderer.parsed,
  23    }
  24}
  25
  26struct MarkdownParser<'a> {
  27    tokens: Vec<(Event<'a>, Range<usize>)>,
  28    /// The current index in the tokens array
  29    cursor: usize,
  30    /// The blocks that we have successfully parsed so far
  31    parsed: Vec<ParsedMarkdownElement>,
  32    file_location_directory: Option<PathBuf>,
  33    language_registry: Option<Arc<LanguageRegistry>>,
  34}
  35
  36impl<'a> MarkdownParser<'a> {
  37    fn new(
  38        tokens: Vec<(Event<'a>, Range<usize>)>,
  39        file_location_directory: Option<PathBuf>,
  40        language_registry: Option<Arc<LanguageRegistry>>,
  41    ) -> Self {
  42        Self {
  43            tokens,
  44            file_location_directory,
  45            language_registry,
  46            cursor: 0,
  47            parsed: vec![],
  48        }
  49    }
  50
  51    fn eof(&self) -> bool {
  52        if self.tokens.is_empty() {
  53            return true;
  54        }
  55        self.cursor >= self.tokens.len() - 1
  56    }
  57
  58    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  59        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  60            return self.tokens.last();
  61        }
  62        return self.tokens.get(self.cursor + steps);
  63    }
  64
  65    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  66        if self.cursor == 0 || self.cursor > self.tokens.len() {
  67            return None;
  68        }
  69        return self.tokens.get(self.cursor - 1);
  70    }
  71
  72    fn current(&self) -> Option<&(Event, Range<usize>)> {
  73        return self.peek(0);
  74    }
  75
  76    fn current_event(&self) -> Option<&Event> {
  77        return self.current().map(|(event, _)| event);
  78    }
  79
  80    fn is_text_like(event: &Event) -> bool {
  81        match event {
  82            Event::Text(_)
  83            // Represent an inline code block
  84            | Event::Code(_)
  85            | Event::Html(_)
  86            | Event::FootnoteReference(_)
  87            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
  88            | Event::Start(Tag::Emphasis)
  89            | Event::Start(Tag::Strong)
  90            | Event::Start(Tag::Strikethrough)
  91            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
  92                return true;
  93            }
  94            _ => return false,
  95        }
  96    }
  97
  98    async fn parse_document(mut self) -> Self {
  99        while !self.eof() {
 100            if let Some(block) = self.parse_block().await {
 101                self.parsed.push(block);
 102            }
 103        }
 104        self
 105    }
 106
 107    async fn parse_block(&mut self) -> Option<ParsedMarkdownElement> {
 108        let (current, source_range) = self.current().unwrap();
 109        match current {
 110            Event::Start(tag) => match tag {
 111                Tag::Paragraph => {
 112                    self.cursor += 1;
 113                    let text = self.parse_text(false);
 114                    Some(ParsedMarkdownElement::Paragraph(text))
 115                }
 116                Tag::Heading {
 117                    level,
 118                    id: _,
 119                    classes: _,
 120                    attrs: _,
 121                } => {
 122                    let level = *level;
 123                    self.cursor += 1;
 124                    let heading = self.parse_heading(level);
 125                    Some(ParsedMarkdownElement::Heading(heading))
 126                }
 127                Tag::Table(alignment) => {
 128                    let alignment = alignment.clone();
 129                    self.cursor += 1;
 130                    let table = self.parse_table(alignment);
 131                    Some(ParsedMarkdownElement::Table(table))
 132                }
 133                Tag::List(order) => {
 134                    let order = *order;
 135                    self.cursor += 1;
 136                    let list = self.parse_list(1, order).await;
 137                    Some(ParsedMarkdownElement::List(list))
 138                }
 139                Tag::BlockQuote => {
 140                    self.cursor += 1;
 141                    let block_quote = self.parse_block_quote().await;
 142                    Some(ParsedMarkdownElement::BlockQuote(block_quote))
 143                }
 144                Tag::CodeBlock(kind) => {
 145                    let language = match kind {
 146                        pulldown_cmark::CodeBlockKind::Indented => None,
 147                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 148                            if language.is_empty() {
 149                                None
 150                            } else {
 151                                Some(language.to_string())
 152                            }
 153                        }
 154                    };
 155
 156                    self.cursor += 1;
 157
 158                    let code_block = self.parse_code_block(language).await;
 159                    Some(ParsedMarkdownElement::CodeBlock(code_block))
 160                }
 161                _ => {
 162                    self.cursor += 1;
 163                    None
 164                }
 165            },
 166            Event::Rule => {
 167                let source_range = source_range.clone();
 168                self.cursor += 1;
 169                Some(ParsedMarkdownElement::HorizontalRule(source_range))
 170            }
 171            _ => {
 172                self.cursor += 1;
 173                None
 174            }
 175        }
 176    }
 177
 178    fn parse_text(&mut self, should_complete_on_soft_break: bool) -> ParsedMarkdownText {
 179        let (_current, source_range) = self.previous().unwrap();
 180        let source_range = source_range.clone();
 181
 182        let mut text = String::new();
 183        let mut bold_depth = 0;
 184        let mut italic_depth = 0;
 185        let mut strikethrough_depth = 0;
 186        let mut link: Option<Link> = None;
 187        let mut region_ranges: Vec<Range<usize>> = vec![];
 188        let mut regions: Vec<ParsedRegion> = vec![];
 189        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 190
 191        loop {
 192            if self.eof() {
 193                break;
 194            }
 195
 196            let (current, _source_range) = self.current().unwrap();
 197            let prev_len = text.len();
 198            match current {
 199                Event::SoftBreak => {
 200                    if should_complete_on_soft_break {
 201                        break;
 202                    }
 203
 204                    // `Some text\nSome more text` should be treated as a single line.
 205                    text.push(' ');
 206                }
 207
 208                Event::HardBreak => {
 209                    text.push('\n');
 210                }
 211
 212                Event::Text(t) => {
 213                    text.push_str(t.as_ref());
 214
 215                    let mut style = MarkdownHighlightStyle::default();
 216
 217                    if bold_depth > 0 {
 218                        style.weight = FontWeight::BOLD;
 219                    }
 220
 221                    if italic_depth > 0 {
 222                        style.italic = true;
 223                    }
 224
 225                    if strikethrough_depth > 0 {
 226                        style.strikethrough = true;
 227                    }
 228
 229                    if let Some(link) = link.clone() {
 230                        region_ranges.push(prev_len..text.len());
 231                        regions.push(ParsedRegion {
 232                            code: false,
 233                            link: Some(link),
 234                        });
 235                        style.underline = true;
 236                    }
 237
 238                    if style != MarkdownHighlightStyle::default() {
 239                        let mut new_highlight = true;
 240                        if let Some((last_range, MarkdownHighlight::Style(last_style))) =
 241                            highlights.last_mut()
 242                        {
 243                            if last_range.end == prev_len && last_style == &style {
 244                                last_range.end = text.len();
 245                                new_highlight = false;
 246                            }
 247                        }
 248                        if new_highlight {
 249                            let range = prev_len..text.len();
 250                            highlights.push((range, MarkdownHighlight::Style(style)));
 251                        }
 252                    }
 253                }
 254
 255                // Note: This event means "inline code" and not "code block"
 256                Event::Code(t) => {
 257                    text.push_str(t.as_ref());
 258                    region_ranges.push(prev_len..text.len());
 259
 260                    if link.is_some() {
 261                        highlights.push((
 262                            prev_len..text.len(),
 263                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 264                                underline: true,
 265                                ..Default::default()
 266                            }),
 267                        ));
 268                    }
 269
 270                    regions.push(ParsedRegion {
 271                        code: true,
 272                        link: link.clone(),
 273                    });
 274                }
 275
 276                Event::Start(tag) => match tag {
 277                    Tag::Emphasis => italic_depth += 1,
 278                    Tag::Strong => bold_depth += 1,
 279                    Tag::Strikethrough => strikethrough_depth += 1,
 280                    Tag::Link {
 281                        link_type: _,
 282                        dest_url,
 283                        title: _,
 284                        id: _,
 285                    } => {
 286                        link = Link::identify(
 287                            self.file_location_directory.clone(),
 288                            dest_url.to_string(),
 289                        );
 290                    }
 291                    _ => {
 292                        break;
 293                    }
 294                },
 295
 296                Event::End(tag) => match tag {
 297                    TagEnd::Emphasis => {
 298                        italic_depth -= 1;
 299                    }
 300                    TagEnd::Strong => {
 301                        bold_depth -= 1;
 302                    }
 303                    TagEnd::Strikethrough => {
 304                        strikethrough_depth -= 1;
 305                    }
 306                    TagEnd::Link => {
 307                        link = None;
 308                    }
 309                    TagEnd::Paragraph => {
 310                        self.cursor += 1;
 311                        break;
 312                    }
 313                    _ => {
 314                        break;
 315                    }
 316                },
 317
 318                _ => {
 319                    break;
 320                }
 321            }
 322
 323            self.cursor += 1;
 324        }
 325
 326        ParsedMarkdownText {
 327            source_range,
 328            contents: text,
 329            highlights,
 330            regions,
 331            region_ranges,
 332        }
 333    }
 334
 335    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 336        let (_event, source_range) = self.previous().unwrap();
 337        let source_range = source_range.clone();
 338        let text = self.parse_text(true);
 339
 340        // Advance past the heading end tag
 341        self.cursor += 1;
 342
 343        ParsedMarkdownHeading {
 344            source_range: source_range.clone(),
 345            level: match level {
 346                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 347                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 348                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 349                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 350                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 351                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 352            },
 353            contents: text,
 354        }
 355    }
 356
 357    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 358        let (_event, source_range) = self.previous().unwrap();
 359        let source_range = source_range.clone();
 360        let mut header = ParsedMarkdownTableRow::new();
 361        let mut body = vec![];
 362        let mut current_row = vec![];
 363        let mut in_header = true;
 364        let column_alignments = alignment
 365            .iter()
 366            .map(|a| Self::convert_alignment(a))
 367            .collect();
 368
 369        loop {
 370            if self.eof() {
 371                break;
 372            }
 373
 374            let (current, _source_range) = self.current().unwrap();
 375            match current {
 376                Event::Start(Tag::TableHead)
 377                | Event::Start(Tag::TableRow)
 378                | Event::End(TagEnd::TableCell) => {
 379                    self.cursor += 1;
 380                }
 381                Event::Start(Tag::TableCell) => {
 382                    self.cursor += 1;
 383                    let cell_contents = self.parse_text(false);
 384                    current_row.push(cell_contents);
 385                }
 386                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 387                    self.cursor += 1;
 388                    let new_row = std::mem::replace(&mut current_row, vec![]);
 389                    if in_header {
 390                        header.children = new_row;
 391                        in_header = false;
 392                    } else {
 393                        let row = ParsedMarkdownTableRow::with_children(new_row);
 394                        body.push(row);
 395                    }
 396                }
 397                Event::End(TagEnd::Table) => {
 398                    self.cursor += 1;
 399                    break;
 400                }
 401                _ => {
 402                    break;
 403                }
 404            }
 405        }
 406
 407        ParsedMarkdownTable {
 408            source_range,
 409            header,
 410            body,
 411            column_alignments,
 412        }
 413    }
 414
 415    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 416        match alignment {
 417            Alignment::None => ParsedMarkdownTableAlignment::None,
 418            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 419            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 420            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 421        }
 422    }
 423
 424    #[async_recursion]
 425    async fn parse_list(&mut self, depth: u16, order: Option<u64>) -> ParsedMarkdownList {
 426        let (_event, source_range) = self.previous().unwrap();
 427        let source_range = source_range.clone();
 428        let mut children = vec![];
 429        let mut inside_list_item = false;
 430        let mut order = order;
 431        let mut task_item = None;
 432
 433        let mut current_list_items: Vec<Box<ParsedMarkdownElement>> = vec![];
 434
 435        while !self.eof() {
 436            let (current, _source_range) = self.current().unwrap();
 437            match current {
 438                Event::Start(Tag::List(order)) => {
 439                    let order = *order;
 440                    self.cursor += 1;
 441
 442                    let inner_list = self.parse_list(depth + 1, order).await;
 443                    let block = ParsedMarkdownElement::List(inner_list);
 444                    current_list_items.push(Box::new(block));
 445                }
 446                Event::End(TagEnd::List(_)) => {
 447                    self.cursor += 1;
 448                    break;
 449                }
 450                Event::Start(Tag::Item) => {
 451                    self.cursor += 1;
 452                    inside_list_item = true;
 453
 454                    // Check for task list marker (`- [ ]` or `- [x]`)
 455                    if let Some(event) = self.current_event() {
 456                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 457                        if event == &Event::Start(Tag::Paragraph) {
 458                            self.cursor += 1;
 459                        }
 460
 461                        if let Some(Event::TaskListMarker(checked)) = self.current_event() {
 462                            task_item = Some(*checked);
 463                            self.cursor += 1;
 464                        }
 465                    }
 466
 467                    if let Some(event) = self.current_event() {
 468                        // This is a plain list item.
 469                        // For example `- some text` or `1. [Docs](./docs.md)`
 470                        if MarkdownParser::is_text_like(event) {
 471                            let text = self.parse_text(false);
 472                            let block = ParsedMarkdownElement::Paragraph(text);
 473                            current_list_items.push(Box::new(block));
 474                        } else {
 475                            let block = self.parse_block().await;
 476                            if let Some(block) = block {
 477                                current_list_items.push(Box::new(block));
 478                            }
 479                        }
 480                    }
 481
 482                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 483                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 484                        self.cursor += 1;
 485                    }
 486                }
 487                Event::End(TagEnd::Item) => {
 488                    self.cursor += 1;
 489
 490                    let item_type = if let Some(checked) = task_item {
 491                        ParsedMarkdownListItemType::Task(checked)
 492                    } else if let Some(order) = order {
 493                        ParsedMarkdownListItemType::Ordered(order)
 494                    } else {
 495                        ParsedMarkdownListItemType::Unordered
 496                    };
 497
 498                    if let Some(current) = order {
 499                        order = Some(current + 1);
 500                    }
 501
 502                    let contents = std::mem::replace(&mut current_list_items, vec![]);
 503
 504                    children.push(ParsedMarkdownListItem {
 505                        contents,
 506                        depth,
 507                        item_type,
 508                    });
 509
 510                    inside_list_item = false;
 511                    task_item = None;
 512                }
 513                _ => {
 514                    if !inside_list_item {
 515                        break;
 516                    }
 517
 518                    let block = self.parse_block().await;
 519                    if let Some(block) = block {
 520                        current_list_items.push(Box::new(block));
 521                    }
 522                }
 523            }
 524        }
 525
 526        ParsedMarkdownList {
 527            source_range,
 528            children,
 529        }
 530    }
 531
 532    #[async_recursion]
 533    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 534        let (_event, source_range) = self.previous().unwrap();
 535        let source_range = source_range.clone();
 536        let mut nested_depth = 1;
 537
 538        let mut children: Vec<Box<ParsedMarkdownElement>> = vec![];
 539
 540        while !self.eof() {
 541            let block = self.parse_block().await;
 542
 543            if let Some(block) = block {
 544                children.push(Box::new(block));
 545            } else {
 546                break;
 547            }
 548
 549            if self.eof() {
 550                break;
 551            }
 552
 553            let (current, _source_range) = self.current().unwrap();
 554            match current {
 555                // This is a nested block quote.
 556                // Record that we're in a nested block quote and continue parsing.
 557                // We don't need to advance the cursor since the next
 558                // call to `parse_block` will handle it.
 559                Event::Start(Tag::BlockQuote) => {
 560                    nested_depth += 1;
 561                }
 562                Event::End(TagEnd::BlockQuote) => {
 563                    nested_depth -= 1;
 564                    if nested_depth == 0 {
 565                        self.cursor += 1;
 566                        break;
 567                    }
 568                }
 569                _ => {}
 570            };
 571        }
 572
 573        ParsedMarkdownBlockQuote {
 574            source_range,
 575            children,
 576        }
 577    }
 578
 579    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 580        let (_event, source_range) = self.previous().unwrap();
 581        let source_range = source_range.clone();
 582        let mut code = String::new();
 583
 584        while !self.eof() {
 585            let (current, _source_range) = self.current().unwrap();
 586            match current {
 587                Event::Text(text) => {
 588                    code.push_str(&text);
 589                    self.cursor += 1;
 590                }
 591                Event::End(TagEnd::CodeBlock) => {
 592                    self.cursor += 1;
 593                    break;
 594                }
 595                _ => {
 596                    break;
 597                }
 598            }
 599        }
 600
 601        let highlights = if let Some(language) = &language {
 602            if let Some(registry) = &self.language_registry {
 603                let rope: language::Rope = code.as_str().into();
 604                registry
 605                    .language_for_name_or_extension(language)
 606                    .await
 607                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 608                    .ok()
 609            } else {
 610                None
 611            }
 612        } else {
 613            None
 614        };
 615
 616        ParsedMarkdownCodeBlock {
 617            source_range,
 618            contents: code.trim().to_string().into(),
 619            language,
 620            highlights,
 621        }
 622    }
 623}
 624
 625#[cfg(test)]
 626mod tests {
 627    use super::*;
 628
 629    use gpui::BackgroundExecutor;
 630    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 631    use pretty_assertions::assert_eq;
 632
 633    use ParsedMarkdownElement::*;
 634    use ParsedMarkdownListItemType::*;
 635
 636    async fn parse(input: &str) -> ParsedMarkdown {
 637        parse_markdown(input, None, None).await
 638    }
 639
 640    #[gpui::test]
 641    async fn test_headings() {
 642        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 643
 644        assert_eq!(
 645            parsed.children,
 646            vec![
 647                h1(text("Heading one", 0..14), 0..14),
 648                h2(text("Heading two", 14..29), 14..29),
 649                h3(text("Heading three", 29..46), 29..46),
 650            ]
 651        );
 652    }
 653
 654    #[gpui::test]
 655    async fn test_newlines_dont_new_paragraphs() {
 656        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 657
 658        assert_eq!(
 659            parsed.children,
 660            vec![p("Some text that is bolded and italicized", 0..46)]
 661        );
 662    }
 663
 664    #[gpui::test]
 665    async fn test_heading_with_paragraph() {
 666        let parsed = parse("# Zed\nThe editor").await;
 667
 668        assert_eq!(
 669            parsed.children,
 670            vec![h1(text("Zed", 0..6), 0..6), p("The editor", 6..16),]
 671        );
 672    }
 673
 674    #[gpui::test]
 675    async fn test_double_newlines_do_new_paragraphs() {
 676        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 677
 678        assert_eq!(
 679            parsed.children,
 680            vec![
 681                p("Some text that is bolded", 0..29),
 682                p("and italicized", 31..47),
 683            ]
 684        );
 685    }
 686
 687    #[gpui::test]
 688    async fn test_bold_italic_text() {
 689        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 690
 691        assert_eq!(
 692            parsed.children,
 693            vec![p("Some text that is bolded and italicized", 0..45)]
 694        );
 695    }
 696
 697    #[gpui::test]
 698    async fn test_nested_bold_strikethrough_text() {
 699        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 700
 701        assert_eq!(parsed.children.len(), 1);
 702        assert_eq!(
 703            parsed.children[0],
 704            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 705                source_range: 0..35,
 706                contents: "Some bostrikethroughld text".to_string(),
 707                highlights: Vec::new(),
 708                region_ranges: Vec::new(),
 709                regions: Vec::new(),
 710            })
 711        );
 712
 713        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 714            text
 715        } else {
 716            panic!("Expected a paragraph");
 717        };
 718        assert_eq!(
 719            paragraph.highlights,
 720            vec![
 721                (
 722                    5..7,
 723                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 724                        weight: FontWeight::BOLD,
 725                        ..Default::default()
 726                    }),
 727                ),
 728                (
 729                    7..20,
 730                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 731                        weight: FontWeight::BOLD,
 732                        strikethrough: true,
 733                        ..Default::default()
 734                    }),
 735                ),
 736                (
 737                    20..22,
 738                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 739                        weight: FontWeight::BOLD,
 740                        ..Default::default()
 741                    }),
 742                ),
 743            ]
 744        );
 745    }
 746
 747    #[gpui::test]
 748    async fn test_header_only_table() {
 749        let markdown = "\
 750| Header 1 | Header 2 |
 751|----------|----------|
 752
 753Some other content
 754";
 755
 756        let expected_table = table(
 757            0..48,
 758            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 759            vec![],
 760        );
 761
 762        assert_eq!(
 763            parse(markdown).await.children[0],
 764            ParsedMarkdownElement::Table(expected_table)
 765        );
 766    }
 767
 768    #[gpui::test]
 769    async fn test_basic_table() {
 770        let markdown = "\
 771| Header 1 | Header 2 |
 772|----------|----------|
 773| Cell 1   | Cell 2   |
 774| Cell 3   | Cell 4   |";
 775
 776        let expected_table = table(
 777            0..95,
 778            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 779            vec![
 780                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 781                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 782            ],
 783        );
 784
 785        assert_eq!(
 786            parse(markdown).await.children[0],
 787            ParsedMarkdownElement::Table(expected_table)
 788        );
 789    }
 790
 791    #[gpui::test]
 792    async fn test_list_basic() {
 793        let parsed = parse(
 794            "\
 795* Item 1
 796* Item 2
 797* Item 3
 798",
 799        )
 800        .await;
 801
 802        assert_eq!(
 803            parsed.children,
 804            vec![list(
 805                vec![
 806                    list_item(1, Unordered, vec![p("Item 1", 0..9)]),
 807                    list_item(1, Unordered, vec![p("Item 2", 9..18)]),
 808                    list_item(1, Unordered, vec![p("Item 3", 18..27)]),
 809                ],
 810                0..27
 811            ),]
 812        );
 813    }
 814
 815    #[gpui::test]
 816    async fn test_list_with_tasks() {
 817        let parsed = parse(
 818            "\
 819- [ ] TODO
 820- [x] Checked
 821",
 822        )
 823        .await;
 824
 825        assert_eq!(
 826            parsed.children,
 827            vec![list(
 828                vec![
 829                    list_item(1, Task(false), vec![p("TODO", 2..5)]),
 830                    list_item(1, Task(true), vec![p("Checked", 13..16)]),
 831                ],
 832                0..25
 833            ),]
 834        );
 835    }
 836
 837    #[gpui::test]
 838    async fn test_list_with_linebreak_is_handled_correctly() {
 839        let parsed = parse(
 840            "\
 841- [ ] Task 1
 842
 843- [x] Task 2
 844",
 845        )
 846        .await;
 847
 848        assert_eq!(
 849            parsed.children,
 850            vec![list(
 851                vec![
 852                    list_item(1, Task(false), vec![p("Task 1", 2..5)]),
 853                    list_item(1, Task(true), vec![p("Task 2", 16..19)]),
 854                ],
 855                0..27
 856            ),]
 857        );
 858    }
 859
 860    #[gpui::test]
 861    async fn test_list_nested() {
 862        let parsed = parse(
 863            "\
 864* Item 1
 865* Item 2
 866* Item 3
 867
 8681. Hello
 8691. Two
 870   1. Three
 8712. Four
 8723. Five
 873
 874* First
 875  1. Hello
 876     1. Goodbyte
 877        - Inner
 878        - Inner
 879  2. Goodbyte
 880* Last
 881",
 882        )
 883        .await;
 884
 885        assert_eq!(
 886            parsed.children,
 887            vec![
 888                list(
 889                    vec![
 890                        list_item(1, Unordered, vec![p("Item 1", 0..9)]),
 891                        list_item(1, Unordered, vec![p("Item 2", 9..18)]),
 892                        list_item(1, Unordered, vec![p("Item 3", 18..28)]),
 893                    ],
 894                    0..28
 895                ),
 896                list(
 897                    vec![
 898                        list_item(1, Ordered(1), vec![p("Hello", 28..37)]),
 899                        list_item(
 900                            1,
 901                            Ordered(2),
 902                            vec![
 903                                p("Two", 37..56),
 904                                list(
 905                                    vec![list_item(2, Ordered(1), vec![p("Three", 47..56)]),],
 906                                    47..56
 907                                ),
 908                            ]
 909                        ),
 910                        list_item(1, Ordered(3), vec![p("Four", 56..64)]),
 911                        list_item(1, Ordered(4), vec![p("Five", 64..73)]),
 912                    ],
 913                    28..73
 914                ),
 915                list(
 916                    vec![
 917                        list_item(
 918                            1,
 919                            Unordered,
 920                            vec![
 921                                p("First", 73..155),
 922                                list(
 923                                    vec![
 924                                        list_item(
 925                                            2,
 926                                            Ordered(1),
 927                                            vec![
 928                                                p("Hello", 83..141),
 929                                                list(
 930                                                    vec![list_item(
 931                                                        3,
 932                                                        Ordered(1),
 933                                                        vec![
 934                                                            p("Goodbyte", 97..141),
 935                                                            list(
 936                                                                vec![
 937                                                                    list_item(
 938                                                                        4,
 939                                                                        Unordered,
 940                                                                        vec![p("Inner", 117..125)]
 941                                                                    ),
 942                                                                    list_item(
 943                                                                        4,
 944                                                                        Unordered,
 945                                                                        vec![p("Inner", 133..141)]
 946                                                                    ),
 947                                                                ],
 948                                                                117..141
 949                                                            )
 950                                                        ]
 951                                                    ),],
 952                                                    97..141
 953                                                )
 954                                            ]
 955                                        ),
 956                                        list_item(2, Ordered(2), vec![p("Goodbyte", 143..155)]),
 957                                    ],
 958                                    83..155
 959                                )
 960                            ]
 961                        ),
 962                        list_item(1, Unordered, vec![p("Last", 155..162)]),
 963                    ],
 964                    73..162
 965                ),
 966            ]
 967        );
 968    }
 969
 970    #[gpui::test]
 971    async fn test_list_with_nested_content() {
 972        let parsed = parse(
 973            "\
 974*   This is a list item with two paragraphs.
 975
 976    This is the second paragraph in the list item.",
 977        )
 978        .await;
 979
 980        assert_eq!(
 981            parsed.children,
 982            vec![list(
 983                vec![list_item(
 984                    1,
 985                    Unordered,
 986                    vec![
 987                        p("This is a list item with two paragraphs.", 4..45),
 988                        p("This is the second paragraph in the list item.", 50..96)
 989                    ],
 990                ),],
 991                0..96,
 992            ),]
 993        );
 994    }
 995
 996    #[gpui::test]
 997    async fn test_list_with_leading_text() {
 998        let parsed = parse(
 999            "\
1000* `code`
1001* **bold**
1002* [link](https://example.com)
1003",
1004        )
1005        .await;
1006
1007        assert_eq!(
1008            parsed.children,
1009            vec![list(
1010                vec![
1011                    list_item(1, Unordered, vec![p("code", 0..9)],),
1012                    list_item(1, Unordered, vec![p("bold", 9..20)]),
1013                    list_item(1, Unordered, vec![p("link", 20..50)],)
1014                ],
1015                0..50,
1016            ),]
1017        );
1018    }
1019
1020    #[gpui::test]
1021    async fn test_simple_block_quote() {
1022        let parsed = parse("> Simple block quote with **styled text**").await;
1023
1024        assert_eq!(
1025            parsed.children,
1026            vec![block_quote(
1027                vec![p("Simple block quote with styled text", 2..41)],
1028                0..41
1029            )]
1030        );
1031    }
1032
1033    #[gpui::test]
1034    async fn test_simple_block_quote_with_multiple_lines() {
1035        let parsed = parse(
1036            "\
1037> # Heading
1038> More
1039> text
1040>
1041> More text
1042",
1043        )
1044        .await;
1045
1046        assert_eq!(
1047            parsed.children,
1048            vec![block_quote(
1049                vec![
1050                    h1(text("Heading", 2..12), 2..12),
1051                    p("More text", 14..26),
1052                    p("More text", 30..40)
1053                ],
1054                0..40
1055            )]
1056        );
1057    }
1058
1059    #[gpui::test]
1060    async fn test_nested_block_quote() {
1061        let parsed = parse(
1062            "\
1063> A
1064>
1065> > # B
1066>
1067> C
1068
1069More text
1070",
1071        )
1072        .await;
1073
1074        assert_eq!(
1075            parsed.children,
1076            vec![
1077                block_quote(
1078                    vec![
1079                        p("A", 2..4),
1080                        block_quote(vec![h1(text("B", 10..14), 10..14)], 8..14),
1081                        p("C", 18..20)
1082                    ],
1083                    0..20
1084                ),
1085                p("More text", 21..31)
1086            ]
1087        );
1088    }
1089
1090    #[gpui::test]
1091    async fn test_code_block() {
1092        let parsed = parse(
1093            "\
1094```
1095fn main() {
1096    return 0;
1097}
1098```
1099",
1100        )
1101        .await;
1102
1103        assert_eq!(
1104            parsed.children,
1105            vec![code_block(
1106                None,
1107                "fn main() {\n    return 0;\n}",
1108                0..35,
1109                None
1110            )]
1111        );
1112    }
1113
1114    #[gpui::test]
1115    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1116        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1117        language_registry.add(rust_lang());
1118
1119        let parsed = parse_markdown(
1120            "\
1121```rust
1122fn main() {
1123    return 0;
1124}
1125```
1126",
1127            None,
1128            Some(language_registry),
1129        )
1130        .await;
1131
1132        assert_eq!(
1133            parsed.children,
1134            vec![code_block(
1135                Some("rust".to_string()),
1136                "fn main() {\n    return 0;\n}",
1137                0..39,
1138                Some(vec![])
1139            )]
1140        );
1141    }
1142
1143    fn rust_lang() -> Arc<Language> {
1144        Arc::new(Language::new(
1145            LanguageConfig {
1146                name: "Rust".into(),
1147                matcher: LanguageMatcher {
1148                    path_suffixes: vec!["rs".into()],
1149                    ..Default::default()
1150                },
1151                collapsed_placeholder: " /* ... */ ".to_string(),
1152                ..Default::default()
1153            },
1154            Some(tree_sitter_rust::language()),
1155        ))
1156    }
1157
1158    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1159        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1160            source_range,
1161            level: HeadingLevel::H1,
1162            contents,
1163        })
1164    }
1165
1166    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1167        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1168            source_range,
1169            level: HeadingLevel::H2,
1170            contents,
1171        })
1172    }
1173
1174    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1175        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1176            source_range,
1177            level: HeadingLevel::H3,
1178            contents,
1179        })
1180    }
1181
1182    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1183        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1184    }
1185
1186    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1187        ParsedMarkdownText {
1188            highlights: Vec::new(),
1189            region_ranges: Vec::new(),
1190            regions: Vec::new(),
1191            source_range,
1192            contents: contents.to_string(),
1193        }
1194    }
1195
1196    fn block_quote(
1197        children: Vec<ParsedMarkdownElement>,
1198        source_range: Range<usize>,
1199    ) -> ParsedMarkdownElement {
1200        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1201            source_range,
1202            children: children.into_iter().map(Box::new).collect(),
1203        })
1204    }
1205
1206    fn code_block(
1207        language: Option<String>,
1208        code: &str,
1209        source_range: Range<usize>,
1210        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1211    ) -> ParsedMarkdownElement {
1212        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1213            source_range,
1214            language,
1215            contents: code.to_string().into(),
1216            highlights,
1217        })
1218    }
1219
1220    fn list(
1221        children: Vec<ParsedMarkdownListItem>,
1222        source_range: Range<usize>,
1223    ) -> ParsedMarkdownElement {
1224        List(ParsedMarkdownList {
1225            source_range,
1226            children,
1227        })
1228    }
1229
1230    fn list_item(
1231        depth: u16,
1232        item_type: ParsedMarkdownListItemType,
1233        contents: Vec<ParsedMarkdownElement>,
1234    ) -> ParsedMarkdownListItem {
1235        ParsedMarkdownListItem {
1236            item_type,
1237            depth,
1238            contents: contents.into_iter().map(Box::new).collect(),
1239        }
1240    }
1241
1242    fn table(
1243        source_range: Range<usize>,
1244        header: ParsedMarkdownTableRow,
1245        body: Vec<ParsedMarkdownTableRow>,
1246    ) -> ParsedMarkdownTable {
1247        ParsedMarkdownTable {
1248            column_alignments: Vec::new(),
1249            source_range,
1250            header,
1251            body,
1252        }
1253    }
1254
1255    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1256        ParsedMarkdownTableRow { children }
1257    }
1258
1259    impl PartialEq for ParsedMarkdownTable {
1260        fn eq(&self, other: &Self) -> bool {
1261            self.source_range == other.source_range
1262                && self.header == other.header
1263                && self.body == other.body
1264        }
1265    }
1266
1267    impl PartialEq for ParsedMarkdownText {
1268        fn eq(&self, other: &Self) -> bool {
1269            self.source_range == other.source_range && self.contents == other.contents
1270        }
1271    }
1272}