markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use gpui::FontWeight;
   4use language::LanguageRegistry;
   5use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   6use std::{ops::Range, path::PathBuf, sync::Arc};
   7
   8pub async fn parse_markdown(
   9    markdown_input: &str,
  10    file_location_directory: Option<PathBuf>,
  11    language_registry: Option<Arc<LanguageRegistry>>,
  12) -> ParsedMarkdown {
  13    let options = Options::all();
  14    let parser = Parser::new_ext(markdown_input, options);
  15    let parser = MarkdownParser::new(
  16        parser.into_offset_iter().collect(),
  17        file_location_directory,
  18        language_registry,
  19    );
  20    let renderer = parser.parse_document().await;
  21    ParsedMarkdown {
  22        children: renderer.parsed,
  23    }
  24}
  25
  26struct MarkdownParser<'a> {
  27    tokens: Vec<(Event<'a>, Range<usize>)>,
  28    /// The current index in the tokens array
  29    cursor: usize,
  30    /// The blocks that we have successfully parsed so far
  31    parsed: Vec<ParsedMarkdownElement>,
  32    file_location_directory: Option<PathBuf>,
  33    language_registry: Option<Arc<LanguageRegistry>>,
  34}
  35
  36impl<'a> MarkdownParser<'a> {
  37    fn new(
  38        tokens: Vec<(Event<'a>, Range<usize>)>,
  39        file_location_directory: Option<PathBuf>,
  40        language_registry: Option<Arc<LanguageRegistry>>,
  41    ) -> Self {
  42        Self {
  43            tokens,
  44            file_location_directory,
  45            language_registry,
  46            cursor: 0,
  47            parsed: vec![],
  48        }
  49    }
  50
  51    fn eof(&self) -> bool {
  52        if self.tokens.is_empty() {
  53            return true;
  54        }
  55        self.cursor >= self.tokens.len() - 1
  56    }
  57
  58    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  59        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  60            return self.tokens.last();
  61        }
  62        return self.tokens.get(self.cursor + steps);
  63    }
  64
  65    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  66        if self.cursor == 0 || self.cursor > self.tokens.len() {
  67            return None;
  68        }
  69        return self.tokens.get(self.cursor - 1);
  70    }
  71
  72    fn current(&self) -> Option<&(Event, Range<usize>)> {
  73        return self.peek(0);
  74    }
  75
  76    fn current_event(&self) -> Option<&Event> {
  77        return self.current().map(|(event, _)| event);
  78    }
  79
  80    fn is_text_like(event: &Event) -> bool {
  81        match event {
  82            Event::Text(_)
  83            // Represent an inline code block
  84            | Event::Code(_)
  85            | Event::Html(_)
  86            | Event::FootnoteReference(_)
  87            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
  88            | Event::Start(Tag::Emphasis)
  89            | Event::Start(Tag::Strong)
  90            | Event::Start(Tag::Strikethrough)
  91            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
  92                return true;
  93            }
  94            _ => return false,
  95        }
  96    }
  97
  98    async fn parse_document(mut self) -> Self {
  99        while !self.eof() {
 100            if let Some(block) = self.parse_block().await {
 101                self.parsed.push(block);
 102            }
 103        }
 104        self
 105    }
 106
 107    async fn parse_block(&mut self) -> Option<ParsedMarkdownElement> {
 108        let (current, source_range) = self.current().unwrap();
 109        match current {
 110            Event::Start(tag) => match tag {
 111                Tag::Paragraph => {
 112                    self.cursor += 1;
 113                    let text = self.parse_text(false);
 114                    Some(ParsedMarkdownElement::Paragraph(text))
 115                }
 116                Tag::Heading {
 117                    level,
 118                    id: _,
 119                    classes: _,
 120                    attrs: _,
 121                } => {
 122                    let level = *level;
 123                    self.cursor += 1;
 124                    let heading = self.parse_heading(level);
 125                    Some(ParsedMarkdownElement::Heading(heading))
 126                }
 127                Tag::Table(alignment) => {
 128                    let alignment = alignment.clone();
 129                    self.cursor += 1;
 130                    let table = self.parse_table(alignment);
 131                    Some(ParsedMarkdownElement::Table(table))
 132                }
 133                Tag::List(order) => {
 134                    let order = *order;
 135                    self.cursor += 1;
 136                    let list = self.parse_list(1, order).await;
 137                    Some(ParsedMarkdownElement::List(list))
 138                }
 139                Tag::BlockQuote => {
 140                    self.cursor += 1;
 141                    let block_quote = self.parse_block_quote().await;
 142                    Some(ParsedMarkdownElement::BlockQuote(block_quote))
 143                }
 144                Tag::CodeBlock(kind) => {
 145                    let language = match kind {
 146                        pulldown_cmark::CodeBlockKind::Indented => None,
 147                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 148                            if language.is_empty() {
 149                                None
 150                            } else {
 151                                Some(language.to_string())
 152                            }
 153                        }
 154                    };
 155
 156                    self.cursor += 1;
 157
 158                    let code_block = self.parse_code_block(language).await;
 159                    Some(ParsedMarkdownElement::CodeBlock(code_block))
 160                }
 161                _ => {
 162                    self.cursor += 1;
 163                    None
 164                }
 165            },
 166            Event::Rule => {
 167                let source_range = source_range.clone();
 168                self.cursor += 1;
 169                Some(ParsedMarkdownElement::HorizontalRule(source_range))
 170            }
 171            _ => {
 172                self.cursor += 1;
 173                None
 174            }
 175        }
 176    }
 177
 178    fn parse_text(&mut self, should_complete_on_soft_break: bool) -> ParsedMarkdownText {
 179        let (_current, source_range) = self.previous().unwrap();
 180        let source_range = source_range.clone();
 181
 182        let mut text = String::new();
 183        let mut bold_depth = 0;
 184        let mut italic_depth = 0;
 185        let mut strikethrough_depth = 0;
 186        let mut link: Option<Link> = None;
 187        let mut region_ranges: Vec<Range<usize>> = vec![];
 188        let mut regions: Vec<ParsedRegion> = vec![];
 189        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 190
 191        let mut link_urls: Vec<String> = vec![];
 192        let mut link_ranges: Vec<Range<usize>> = vec![];
 193
 194        loop {
 195            if self.eof() {
 196                break;
 197            }
 198
 199            let (current, _source_range) = self.current().unwrap();
 200            let prev_len = text.len();
 201            match current {
 202                Event::SoftBreak => {
 203                    if should_complete_on_soft_break {
 204                        break;
 205                    }
 206
 207                    // `Some text\nSome more text` should be treated as a single line.
 208                    text.push(' ');
 209                }
 210
 211                Event::HardBreak => {
 212                    text.push('\n');
 213                }
 214
 215                Event::Text(t) => {
 216                    text.push_str(t.as_ref());
 217
 218                    let mut style = MarkdownHighlightStyle::default();
 219
 220                    if bold_depth > 0 {
 221                        style.weight = FontWeight::BOLD;
 222                    }
 223
 224                    if italic_depth > 0 {
 225                        style.italic = true;
 226                    }
 227
 228                    if strikethrough_depth > 0 {
 229                        style.strikethrough = true;
 230                    }
 231
 232                    let last_run_len = if let Some(link) = link.clone() {
 233                        region_ranges.push(prev_len..text.len());
 234                        regions.push(ParsedRegion {
 235                            code: false,
 236                            link: Some(link),
 237                        });
 238                        style.underline = true;
 239                        prev_len
 240                    } else {
 241                        // Manually scan for links
 242                        let mut finder = linkify::LinkFinder::new();
 243                        finder.kinds(&[linkify::LinkKind::Url]);
 244                        let mut last_link_len = prev_len;
 245                        for link in finder.links(&t) {
 246                            let start = link.start();
 247                            let end = link.end();
 248                            let range = (prev_len + start)..(prev_len + end);
 249                            link_ranges.push(range.clone());
 250                            link_urls.push(link.as_str().to_string());
 251
 252                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 253                            if style != MarkdownHighlightStyle::default()
 254                                && last_link_len < link.start()
 255                            {
 256                                highlights.push((
 257                                    last_link_len..link.start(),
 258                                    MarkdownHighlight::Style(style.clone()),
 259                                ));
 260                            }
 261
 262                            highlights.push((
 263                                range.clone(),
 264                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 265                                    underline: true,
 266                                    ..style
 267                                }),
 268                            ));
 269                            region_ranges.push(range.clone());
 270                            regions.push(ParsedRegion {
 271                                code: false,
 272                                link: Some(Link::Web {
 273                                    url: link.as_str().to_string(),
 274                                }),
 275                            });
 276
 277                            last_link_len = end;
 278                        }
 279                        last_link_len
 280                    };
 281
 282                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 283                        let mut new_highlight = true;
 284                        if let Some((last_range, last_style)) = highlights.last_mut() {
 285                            if last_range.end == last_run_len
 286                                && last_style == &MarkdownHighlight::Style(style.clone())
 287                            {
 288                                last_range.end = text.len();
 289                                new_highlight = false;
 290                            }
 291                        }
 292                        if new_highlight {
 293                            highlights
 294                                .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
 295                        }
 296                    }
 297                }
 298
 299                // Note: This event means "inline code" and not "code block"
 300                Event::Code(t) => {
 301                    text.push_str(t.as_ref());
 302                    region_ranges.push(prev_len..text.len());
 303
 304                    if link.is_some() {
 305                        highlights.push((
 306                            prev_len..text.len(),
 307                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 308                                underline: true,
 309                                ..Default::default()
 310                            }),
 311                        ));
 312                    }
 313
 314                    regions.push(ParsedRegion {
 315                        code: true,
 316                        link: link.clone(),
 317                    });
 318                }
 319
 320                Event::Start(tag) => match tag {
 321                    Tag::Emphasis => italic_depth += 1,
 322                    Tag::Strong => bold_depth += 1,
 323                    Tag::Strikethrough => strikethrough_depth += 1,
 324                    Tag::Link {
 325                        link_type: _,
 326                        dest_url,
 327                        title: _,
 328                        id: _,
 329                    } => {
 330                        link = Link::identify(
 331                            self.file_location_directory.clone(),
 332                            dest_url.to_string(),
 333                        );
 334                    }
 335                    _ => {
 336                        break;
 337                    }
 338                },
 339
 340                Event::End(tag) => match tag {
 341                    TagEnd::Emphasis => {
 342                        italic_depth -= 1;
 343                    }
 344                    TagEnd::Strong => {
 345                        bold_depth -= 1;
 346                    }
 347                    TagEnd::Strikethrough => {
 348                        strikethrough_depth -= 1;
 349                    }
 350                    TagEnd::Link => {
 351                        link = None;
 352                    }
 353                    TagEnd::Paragraph => {
 354                        self.cursor += 1;
 355                        break;
 356                    }
 357                    _ => {
 358                        break;
 359                    }
 360                },
 361
 362                _ => {
 363                    break;
 364                }
 365            }
 366
 367            self.cursor += 1;
 368        }
 369
 370        ParsedMarkdownText {
 371            source_range,
 372            contents: text,
 373            highlights,
 374            regions,
 375            region_ranges,
 376        }
 377    }
 378
 379    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 380        let (_event, source_range) = self.previous().unwrap();
 381        let source_range = source_range.clone();
 382        let text = self.parse_text(true);
 383
 384        // Advance past the heading end tag
 385        self.cursor += 1;
 386
 387        ParsedMarkdownHeading {
 388            source_range: source_range.clone(),
 389            level: match level {
 390                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 391                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 392                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 393                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 394                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 395                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 396            },
 397            contents: text,
 398        }
 399    }
 400
 401    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 402        let (_event, source_range) = self.previous().unwrap();
 403        let source_range = source_range.clone();
 404        let mut header = ParsedMarkdownTableRow::new();
 405        let mut body = vec![];
 406        let mut current_row = vec![];
 407        let mut in_header = true;
 408        let column_alignments = alignment
 409            .iter()
 410            .map(|a| Self::convert_alignment(a))
 411            .collect();
 412
 413        loop {
 414            if self.eof() {
 415                break;
 416            }
 417
 418            let (current, _source_range) = self.current().unwrap();
 419            match current {
 420                Event::Start(Tag::TableHead)
 421                | Event::Start(Tag::TableRow)
 422                | Event::End(TagEnd::TableCell) => {
 423                    self.cursor += 1;
 424                }
 425                Event::Start(Tag::TableCell) => {
 426                    self.cursor += 1;
 427                    let cell_contents = self.parse_text(false);
 428                    current_row.push(cell_contents);
 429                }
 430                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 431                    self.cursor += 1;
 432                    let new_row = std::mem::replace(&mut current_row, vec![]);
 433                    if in_header {
 434                        header.children = new_row;
 435                        in_header = false;
 436                    } else {
 437                        let row = ParsedMarkdownTableRow::with_children(new_row);
 438                        body.push(row);
 439                    }
 440                }
 441                Event::End(TagEnd::Table) => {
 442                    self.cursor += 1;
 443                    break;
 444                }
 445                _ => {
 446                    break;
 447                }
 448            }
 449        }
 450
 451        ParsedMarkdownTable {
 452            source_range,
 453            header,
 454            body,
 455            column_alignments,
 456        }
 457    }
 458
 459    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 460        match alignment {
 461            Alignment::None => ParsedMarkdownTableAlignment::None,
 462            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 463            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 464            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 465        }
 466    }
 467
 468    #[async_recursion]
 469    async fn parse_list(&mut self, depth: u16, order: Option<u64>) -> ParsedMarkdownList {
 470        let (_event, source_range) = self.previous().unwrap();
 471        let source_range = source_range.clone();
 472        let mut children = vec![];
 473        let mut inside_list_item = false;
 474        let mut order = order;
 475        let mut task_item = None;
 476
 477        let mut current_list_items: Vec<Box<ParsedMarkdownElement>> = vec![];
 478
 479        while !self.eof() {
 480            let (current, _source_range) = self.current().unwrap();
 481            match current {
 482                Event::Start(Tag::List(order)) => {
 483                    let order = *order;
 484                    self.cursor += 1;
 485
 486                    let inner_list = self.parse_list(depth + 1, order).await;
 487                    let block = ParsedMarkdownElement::List(inner_list);
 488                    current_list_items.push(Box::new(block));
 489                }
 490                Event::End(TagEnd::List(_)) => {
 491                    self.cursor += 1;
 492                    break;
 493                }
 494                Event::Start(Tag::Item) => {
 495                    self.cursor += 1;
 496                    inside_list_item = true;
 497
 498                    // Check for task list marker (`- [ ]` or `- [x]`)
 499                    if let Some(event) = self.current_event() {
 500                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 501                        if event == &Event::Start(Tag::Paragraph) {
 502                            self.cursor += 1;
 503                        }
 504
 505                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 506                            task_item = Some((*checked, range.clone()));
 507                            self.cursor += 1;
 508                        }
 509                    }
 510
 511                    if let Some(event) = self.current_event() {
 512                        // This is a plain list item.
 513                        // For example `- some text` or `1. [Docs](./docs.md)`
 514                        if MarkdownParser::is_text_like(event) {
 515                            let text = self.parse_text(false);
 516                            let block = ParsedMarkdownElement::Paragraph(text);
 517                            current_list_items.push(Box::new(block));
 518                        } else {
 519                            let block = self.parse_block().await;
 520                            if let Some(block) = block {
 521                                current_list_items.push(Box::new(block));
 522                            }
 523                        }
 524                    }
 525
 526                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 527                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 528                        self.cursor += 1;
 529                    }
 530                }
 531                Event::End(TagEnd::Item) => {
 532                    self.cursor += 1;
 533
 534                    let item_type = if let Some((checked, range)) = task_item {
 535                        ParsedMarkdownListItemType::Task(checked, range)
 536                    } else if let Some(order) = order {
 537                        ParsedMarkdownListItemType::Ordered(order)
 538                    } else {
 539                        ParsedMarkdownListItemType::Unordered
 540                    };
 541
 542                    if let Some(current) = order {
 543                        order = Some(current + 1);
 544                    }
 545
 546                    let contents = std::mem::replace(&mut current_list_items, vec![]);
 547
 548                    children.push(ParsedMarkdownListItem {
 549                        contents,
 550                        depth,
 551                        item_type,
 552                    });
 553
 554                    inside_list_item = false;
 555                    task_item = None;
 556                }
 557                _ => {
 558                    if !inside_list_item {
 559                        break;
 560                    }
 561
 562                    let block = self.parse_block().await;
 563                    if let Some(block) = block {
 564                        current_list_items.push(Box::new(block));
 565                    }
 566                }
 567            }
 568        }
 569
 570        ParsedMarkdownList {
 571            source_range,
 572            children,
 573        }
 574    }
 575
 576    #[async_recursion]
 577    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 578        let (_event, source_range) = self.previous().unwrap();
 579        let source_range = source_range.clone();
 580        let mut nested_depth = 1;
 581
 582        let mut children: Vec<Box<ParsedMarkdownElement>> = vec![];
 583
 584        while !self.eof() {
 585            let block = self.parse_block().await;
 586
 587            if let Some(block) = block {
 588                children.push(Box::new(block));
 589            } else {
 590                break;
 591            }
 592
 593            if self.eof() {
 594                break;
 595            }
 596
 597            let (current, _source_range) = self.current().unwrap();
 598            match current {
 599                // This is a nested block quote.
 600                // Record that we're in a nested block quote and continue parsing.
 601                // We don't need to advance the cursor since the next
 602                // call to `parse_block` will handle it.
 603                Event::Start(Tag::BlockQuote) => {
 604                    nested_depth += 1;
 605                }
 606                Event::End(TagEnd::BlockQuote) => {
 607                    nested_depth -= 1;
 608                    if nested_depth == 0 {
 609                        self.cursor += 1;
 610                        break;
 611                    }
 612                }
 613                _ => {}
 614            };
 615        }
 616
 617        ParsedMarkdownBlockQuote {
 618            source_range,
 619            children,
 620        }
 621    }
 622
 623    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 624        let (_event, source_range) = self.previous().unwrap();
 625        let source_range = source_range.clone();
 626        let mut code = String::new();
 627
 628        while !self.eof() {
 629            let (current, _source_range) = self.current().unwrap();
 630            match current {
 631                Event::Text(text) => {
 632                    code.push_str(&text);
 633                    self.cursor += 1;
 634                }
 635                Event::End(TagEnd::CodeBlock) => {
 636                    self.cursor += 1;
 637                    break;
 638                }
 639                _ => {
 640                    break;
 641                }
 642            }
 643        }
 644
 645        let highlights = if let Some(language) = &language {
 646            if let Some(registry) = &self.language_registry {
 647                let rope: language::Rope = code.as_str().into();
 648                registry
 649                    .language_for_name_or_extension(language)
 650                    .await
 651                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 652                    .ok()
 653            } else {
 654                None
 655            }
 656        } else {
 657            None
 658        };
 659
 660        ParsedMarkdownCodeBlock {
 661            source_range,
 662            contents: code.trim().to_string().into(),
 663            language,
 664            highlights,
 665        }
 666    }
 667}
 668
 669#[cfg(test)]
 670mod tests {
 671    use super::*;
 672
 673    use gpui::BackgroundExecutor;
 674    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 675    use pretty_assertions::assert_eq;
 676
 677    use ParsedMarkdownElement::*;
 678    use ParsedMarkdownListItemType::*;
 679
 680    async fn parse(input: &str) -> ParsedMarkdown {
 681        parse_markdown(input, None, None).await
 682    }
 683
 684    #[gpui::test]
 685    async fn test_headings() {
 686        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 687
 688        assert_eq!(
 689            parsed.children,
 690            vec![
 691                h1(text("Heading one", 0..14), 0..14),
 692                h2(text("Heading two", 14..29), 14..29),
 693                h3(text("Heading three", 29..46), 29..46),
 694            ]
 695        );
 696    }
 697
 698    #[gpui::test]
 699    async fn test_newlines_dont_new_paragraphs() {
 700        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 701
 702        assert_eq!(
 703            parsed.children,
 704            vec![p("Some text that is bolded and italicized", 0..46)]
 705        );
 706    }
 707
 708    #[gpui::test]
 709    async fn test_heading_with_paragraph() {
 710        let parsed = parse("# Zed\nThe editor").await;
 711
 712        assert_eq!(
 713            parsed.children,
 714            vec![h1(text("Zed", 0..6), 0..6), p("The editor", 6..16),]
 715        );
 716    }
 717
 718    #[gpui::test]
 719    async fn test_double_newlines_do_new_paragraphs() {
 720        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 721
 722        assert_eq!(
 723            parsed.children,
 724            vec![
 725                p("Some text that is bolded", 0..29),
 726                p("and italicized", 31..47),
 727            ]
 728        );
 729    }
 730
 731    #[gpui::test]
 732    async fn test_bold_italic_text() {
 733        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 734
 735        assert_eq!(
 736            parsed.children,
 737            vec![p("Some text that is bolded and italicized", 0..45)]
 738        );
 739    }
 740
 741    #[gpui::test]
 742    async fn test_nested_bold_strikethrough_text() {
 743        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 744
 745        assert_eq!(parsed.children.len(), 1);
 746        assert_eq!(
 747            parsed.children[0],
 748            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 749                source_range: 0..35,
 750                contents: "Some bostrikethroughld text".to_string(),
 751                highlights: Vec::new(),
 752                region_ranges: Vec::new(),
 753                regions: Vec::new(),
 754            })
 755        );
 756
 757        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 758            text
 759        } else {
 760            panic!("Expected a paragraph");
 761        };
 762        assert_eq!(
 763            paragraph.highlights,
 764            vec![
 765                (
 766                    5..7,
 767                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 768                        weight: FontWeight::BOLD,
 769                        ..Default::default()
 770                    }),
 771                ),
 772                (
 773                    7..20,
 774                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 775                        weight: FontWeight::BOLD,
 776                        strikethrough: true,
 777                        ..Default::default()
 778                    }),
 779                ),
 780                (
 781                    20..22,
 782                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 783                        weight: FontWeight::BOLD,
 784                        ..Default::default()
 785                    }),
 786                ),
 787            ]
 788        );
 789    }
 790
 791    #[gpui::test]
 792    async fn test_raw_links_detection() {
 793        let parsed = parse("Checkout this https://zed.dev link").await;
 794
 795        assert_eq!(
 796            parsed.children,
 797            vec![p("Checkout this https://zed.dev link", 0..34)]
 798        );
 799
 800        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 801            text
 802        } else {
 803            panic!("Expected a paragraph");
 804        };
 805        assert_eq!(
 806            paragraph.highlights,
 807            vec![(
 808                14..29,
 809                MarkdownHighlight::Style(MarkdownHighlightStyle {
 810                    underline: true,
 811                    ..Default::default()
 812                }),
 813            )]
 814        );
 815        assert_eq!(
 816            paragraph.regions,
 817            vec![ParsedRegion {
 818                code: false,
 819                link: Some(Link::Web {
 820                    url: "https://zed.dev".to_string()
 821                }),
 822            }]
 823        );
 824        assert_eq!(paragraph.region_ranges, vec![14..29]);
 825    }
 826
 827    #[gpui::test]
 828    async fn test_header_only_table() {
 829        let markdown = "\
 830| Header 1 | Header 2 |
 831|----------|----------|
 832
 833Some other content
 834";
 835
 836        let expected_table = table(
 837            0..48,
 838            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 839            vec![],
 840        );
 841
 842        assert_eq!(
 843            parse(markdown).await.children[0],
 844            ParsedMarkdownElement::Table(expected_table)
 845        );
 846    }
 847
 848    #[gpui::test]
 849    async fn test_basic_table() {
 850        let markdown = "\
 851| Header 1 | Header 2 |
 852|----------|----------|
 853| Cell 1   | Cell 2   |
 854| Cell 3   | Cell 4   |";
 855
 856        let expected_table = table(
 857            0..95,
 858            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 859            vec![
 860                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 861                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 862            ],
 863        );
 864
 865        assert_eq!(
 866            parse(markdown).await.children[0],
 867            ParsedMarkdownElement::Table(expected_table)
 868        );
 869    }
 870
 871    #[gpui::test]
 872    async fn test_list_basic() {
 873        let parsed = parse(
 874            "\
 875* Item 1
 876* Item 2
 877* Item 3
 878",
 879        )
 880        .await;
 881
 882        assert_eq!(
 883            parsed.children,
 884            vec![list(
 885                vec![
 886                    list_item(1, Unordered, vec![p("Item 1", 0..9)]),
 887                    list_item(1, Unordered, vec![p("Item 2", 9..18)]),
 888                    list_item(1, Unordered, vec![p("Item 3", 18..27)]),
 889                ],
 890                0..27
 891            ),]
 892        );
 893    }
 894
 895    #[gpui::test]
 896    async fn test_list_with_tasks() {
 897        let parsed = parse(
 898            "\
 899- [ ] TODO
 900- [x] Checked
 901",
 902        )
 903        .await;
 904
 905        assert_eq!(
 906            parsed.children,
 907            vec![list(
 908                vec![
 909                    list_item(1, Task(false, 2..5), vec![p("TODO", 2..5)]),
 910                    list_item(1, Task(true, 13..16), vec![p("Checked", 13..16)]),
 911                ],
 912                0..25
 913            ),]
 914        );
 915    }
 916
 917    #[gpui::test]
 918    async fn test_list_with_linebreak_is_handled_correctly() {
 919        let parsed = parse(
 920            "\
 921- [ ] Task 1
 922
 923- [x] Task 2
 924",
 925        )
 926        .await;
 927
 928        assert_eq!(
 929            parsed.children,
 930            vec![list(
 931                vec![
 932                    list_item(1, Task(false, 2..5), vec![p("Task 1", 2..5)]),
 933                    list_item(1, Task(true, 16..19), vec![p("Task 2", 16..19)]),
 934                ],
 935                0..27
 936            ),]
 937        );
 938    }
 939
 940    #[gpui::test]
 941    async fn test_list_nested() {
 942        let parsed = parse(
 943            "\
 944* Item 1
 945* Item 2
 946* Item 3
 947
 9481. Hello
 9491. Two
 950   1. Three
 9512. Four
 9523. Five
 953
 954* First
 955  1. Hello
 956     1. Goodbyte
 957        - Inner
 958        - Inner
 959  2. Goodbyte
 960* Last
 961",
 962        )
 963        .await;
 964
 965        assert_eq!(
 966            parsed.children,
 967            vec![
 968                list(
 969                    vec![
 970                        list_item(1, Unordered, vec![p("Item 1", 0..9)]),
 971                        list_item(1, Unordered, vec![p("Item 2", 9..18)]),
 972                        list_item(1, Unordered, vec![p("Item 3", 18..28)]),
 973                    ],
 974                    0..28
 975                ),
 976                list(
 977                    vec![
 978                        list_item(1, Ordered(1), vec![p("Hello", 28..37)]),
 979                        list_item(
 980                            1,
 981                            Ordered(2),
 982                            vec![
 983                                p("Two", 37..56),
 984                                list(
 985                                    vec![list_item(2, Ordered(1), vec![p("Three", 47..56)]),],
 986                                    47..56
 987                                ),
 988                            ]
 989                        ),
 990                        list_item(1, Ordered(3), vec![p("Four", 56..64)]),
 991                        list_item(1, Ordered(4), vec![p("Five", 64..73)]),
 992                    ],
 993                    28..73
 994                ),
 995                list(
 996                    vec![
 997                        list_item(
 998                            1,
 999                            Unordered,
1000                            vec![
1001                                p("First", 73..155),
1002                                list(
1003                                    vec![
1004                                        list_item(
1005                                            2,
1006                                            Ordered(1),
1007                                            vec![
1008                                                p("Hello", 83..141),
1009                                                list(
1010                                                    vec![list_item(
1011                                                        3,
1012                                                        Ordered(1),
1013                                                        vec![
1014                                                            p("Goodbyte", 97..141),
1015                                                            list(
1016                                                                vec![
1017                                                                    list_item(
1018                                                                        4,
1019                                                                        Unordered,
1020                                                                        vec![p("Inner", 117..125)]
1021                                                                    ),
1022                                                                    list_item(
1023                                                                        4,
1024                                                                        Unordered,
1025                                                                        vec![p("Inner", 133..141)]
1026                                                                    ),
1027                                                                ],
1028                                                                117..141
1029                                                            )
1030                                                        ]
1031                                                    ),],
1032                                                    97..141
1033                                                )
1034                                            ]
1035                                        ),
1036                                        list_item(2, Ordered(2), vec![p("Goodbyte", 143..155)]),
1037                                    ],
1038                                    83..155
1039                                )
1040                            ]
1041                        ),
1042                        list_item(1, Unordered, vec![p("Last", 155..162)]),
1043                    ],
1044                    73..162
1045                ),
1046            ]
1047        );
1048    }
1049
1050    #[gpui::test]
1051    async fn test_list_with_nested_content() {
1052        let parsed = parse(
1053            "\
1054*   This is a list item with two paragraphs.
1055
1056    This is the second paragraph in the list item.",
1057        )
1058        .await;
1059
1060        assert_eq!(
1061            parsed.children,
1062            vec![list(
1063                vec![list_item(
1064                    1,
1065                    Unordered,
1066                    vec![
1067                        p("This is a list item with two paragraphs.", 4..45),
1068                        p("This is the second paragraph in the list item.", 50..96)
1069                    ],
1070                ),],
1071                0..96,
1072            ),]
1073        );
1074    }
1075
1076    #[gpui::test]
1077    async fn test_list_with_leading_text() {
1078        let parsed = parse(
1079            "\
1080* `code`
1081* **bold**
1082* [link](https://example.com)
1083",
1084        )
1085        .await;
1086
1087        assert_eq!(
1088            parsed.children,
1089            vec![list(
1090                vec![
1091                    list_item(1, Unordered, vec![p("code", 0..9)],),
1092                    list_item(1, Unordered, vec![p("bold", 9..20)]),
1093                    list_item(1, Unordered, vec![p("link", 20..50)],)
1094                ],
1095                0..50,
1096            ),]
1097        );
1098    }
1099
1100    #[gpui::test]
1101    async fn test_simple_block_quote() {
1102        let parsed = parse("> Simple block quote with **styled text**").await;
1103
1104        assert_eq!(
1105            parsed.children,
1106            vec![block_quote(
1107                vec![p("Simple block quote with styled text", 2..41)],
1108                0..41
1109            )]
1110        );
1111    }
1112
1113    #[gpui::test]
1114    async fn test_simple_block_quote_with_multiple_lines() {
1115        let parsed = parse(
1116            "\
1117> # Heading
1118> More
1119> text
1120>
1121> More text
1122",
1123        )
1124        .await;
1125
1126        assert_eq!(
1127            parsed.children,
1128            vec![block_quote(
1129                vec![
1130                    h1(text("Heading", 2..12), 2..12),
1131                    p("More text", 14..26),
1132                    p("More text", 30..40)
1133                ],
1134                0..40
1135            )]
1136        );
1137    }
1138
1139    #[gpui::test]
1140    async fn test_nested_block_quote() {
1141        let parsed = parse(
1142            "\
1143> A
1144>
1145> > # B
1146>
1147> C
1148
1149More text
1150",
1151        )
1152        .await;
1153
1154        assert_eq!(
1155            parsed.children,
1156            vec![
1157                block_quote(
1158                    vec![
1159                        p("A", 2..4),
1160                        block_quote(vec![h1(text("B", 10..14), 10..14)], 8..14),
1161                        p("C", 18..20)
1162                    ],
1163                    0..20
1164                ),
1165                p("More text", 21..31)
1166            ]
1167        );
1168    }
1169
1170    #[gpui::test]
1171    async fn test_code_block() {
1172        let parsed = parse(
1173            "\
1174```
1175fn main() {
1176    return 0;
1177}
1178```
1179",
1180        )
1181        .await;
1182
1183        assert_eq!(
1184            parsed.children,
1185            vec![code_block(
1186                None,
1187                "fn main() {\n    return 0;\n}",
1188                0..35,
1189                None
1190            )]
1191        );
1192    }
1193
1194    #[gpui::test]
1195    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1196        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1197        language_registry.add(rust_lang());
1198
1199        let parsed = parse_markdown(
1200            "\
1201```rust
1202fn main() {
1203    return 0;
1204}
1205```
1206",
1207            None,
1208            Some(language_registry),
1209        )
1210        .await;
1211
1212        assert_eq!(
1213            parsed.children,
1214            vec![code_block(
1215                Some("rust".to_string()),
1216                "fn main() {\n    return 0;\n}",
1217                0..39,
1218                Some(vec![])
1219            )]
1220        );
1221    }
1222
1223    fn rust_lang() -> Arc<Language> {
1224        Arc::new(Language::new(
1225            LanguageConfig {
1226                name: "Rust".into(),
1227                matcher: LanguageMatcher {
1228                    path_suffixes: vec!["rs".into()],
1229                    ..Default::default()
1230                },
1231                collapsed_placeholder: " /* ... */ ".to_string(),
1232                ..Default::default()
1233            },
1234            Some(tree_sitter_rust::language()),
1235        ))
1236    }
1237
1238    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1239        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1240            source_range,
1241            level: HeadingLevel::H1,
1242            contents,
1243        })
1244    }
1245
1246    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1247        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1248            source_range,
1249            level: HeadingLevel::H2,
1250            contents,
1251        })
1252    }
1253
1254    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1255        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1256            source_range,
1257            level: HeadingLevel::H3,
1258            contents,
1259        })
1260    }
1261
1262    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1263        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1264    }
1265
1266    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1267        ParsedMarkdownText {
1268            highlights: Vec::new(),
1269            region_ranges: Vec::new(),
1270            regions: Vec::new(),
1271            source_range,
1272            contents: contents.to_string(),
1273        }
1274    }
1275
1276    fn block_quote(
1277        children: Vec<ParsedMarkdownElement>,
1278        source_range: Range<usize>,
1279    ) -> ParsedMarkdownElement {
1280        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1281            source_range,
1282            children: children.into_iter().map(Box::new).collect(),
1283        })
1284    }
1285
1286    fn code_block(
1287        language: Option<String>,
1288        code: &str,
1289        source_range: Range<usize>,
1290        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1291    ) -> ParsedMarkdownElement {
1292        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1293            source_range,
1294            language,
1295            contents: code.to_string().into(),
1296            highlights,
1297        })
1298    }
1299
1300    fn list(
1301        children: Vec<ParsedMarkdownListItem>,
1302        source_range: Range<usize>,
1303    ) -> ParsedMarkdownElement {
1304        List(ParsedMarkdownList {
1305            source_range,
1306            children,
1307        })
1308    }
1309
1310    fn list_item(
1311        depth: u16,
1312        item_type: ParsedMarkdownListItemType,
1313        contents: Vec<ParsedMarkdownElement>,
1314    ) -> ParsedMarkdownListItem {
1315        ParsedMarkdownListItem {
1316            item_type,
1317            depth,
1318            contents: contents.into_iter().map(Box::new).collect(),
1319        }
1320    }
1321
1322    fn table(
1323        source_range: Range<usize>,
1324        header: ParsedMarkdownTableRow,
1325        body: Vec<ParsedMarkdownTableRow>,
1326    ) -> ParsedMarkdownTable {
1327        ParsedMarkdownTable {
1328            column_alignments: Vec::new(),
1329            source_range,
1330            header,
1331            body,
1332        }
1333    }
1334
1335    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1336        ParsedMarkdownTableRow { children }
1337    }
1338
1339    impl PartialEq for ParsedMarkdownTable {
1340        fn eq(&self, other: &Self) -> bool {
1341            self.source_range == other.source_range
1342                && self.header == other.header
1343                && self.body == other.body
1344        }
1345    }
1346
1347    impl PartialEq for ParsedMarkdownText {
1348        fn eq(&self, other: &Self) -> bool {
1349            self.source_range == other.source_range && self.contents == other.contents
1350        }
1351    }
1352}