markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::FootnoteReference(_)
 104            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
 105            | Event::Start(Tag::Emphasis)
 106            | Event::Start(Tag::Strong)
 107            | Event::Start(Tag::Strikethrough)
 108            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
 109                true
 110            }
 111            _ => false,
 112        }
 113    }
 114
 115    async fn parse_document(mut self) -> Self {
 116        while !self.eof() {
 117            if let Some(block) = self.parse_block().await {
 118                self.parsed.extend(block);
 119            } else {
 120                self.cursor += 1;
 121            }
 122        }
 123        self
 124    }
 125
 126    #[async_recursion]
 127    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 128        let (current, source_range) = self.current().unwrap();
 129        let source_range = source_range.clone();
 130        match current {
 131            Event::Start(tag) => match tag {
 132                Tag::Paragraph => {
 133                    self.cursor += 1;
 134                    let text = self.parse_text(false, Some(source_range));
 135                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 136                }
 137                Tag::Heading {
 138                    level,
 139                    id: _,
 140                    classes: _,
 141                    attrs: _,
 142                } => {
 143                    let level = *level;
 144                    self.cursor += 1;
 145                    let heading = self.parse_heading(level);
 146                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 147                }
 148                Tag::Table(alignment) => {
 149                    let alignment = alignment.clone();
 150                    self.cursor += 1;
 151                    let table = self.parse_table(alignment);
 152                    Some(vec![ParsedMarkdownElement::Table(table)])
 153                }
 154                Tag::List(order) => {
 155                    let order = *order;
 156                    self.cursor += 1;
 157                    let list = self.parse_list(order).await;
 158                    Some(list)
 159                }
 160                Tag::BlockQuote(_kind) => {
 161                    self.cursor += 1;
 162                    let block_quote = self.parse_block_quote().await;
 163                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 164                }
 165                Tag::CodeBlock(kind) => {
 166                    let language = match kind {
 167                        pulldown_cmark::CodeBlockKind::Indented => None,
 168                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 169                            if language.is_empty() {
 170                                None
 171                            } else {
 172                                Some(language.to_string())
 173                            }
 174                        }
 175                    };
 176
 177                    self.cursor += 1;
 178
 179                    let code_block = self.parse_code_block(language).await;
 180                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 181                }
 182                _ => None,
 183            },
 184            Event::Rule => {
 185                let source_range = source_range.clone();
 186                self.cursor += 1;
 187                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 188            }
 189            _ => None,
 190        }
 191    }
 192
 193    fn parse_text(
 194        &mut self,
 195        should_complete_on_soft_break: bool,
 196        source_range: Option<Range<usize>>,
 197    ) -> ParsedMarkdownText {
 198        let source_range = source_range.unwrap_or_else(|| {
 199            self.current()
 200                .map(|(_, range)| range.clone())
 201                .unwrap_or_default()
 202        });
 203
 204        let mut text = String::new();
 205        let mut bold_depth = 0;
 206        let mut italic_depth = 0;
 207        let mut strikethrough_depth = 0;
 208        let mut link: Option<Link> = None;
 209        let mut region_ranges: Vec<Range<usize>> = vec![];
 210        let mut regions: Vec<ParsedRegion> = vec![];
 211        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 212
 213        let mut link_urls: Vec<String> = vec![];
 214        let mut link_ranges: Vec<Range<usize>> = vec![];
 215
 216        loop {
 217            if self.eof() {
 218                break;
 219            }
 220
 221            let (current, _source_range) = self.current().unwrap();
 222            let prev_len = text.len();
 223            match current {
 224                Event::SoftBreak => {
 225                    if should_complete_on_soft_break {
 226                        break;
 227                    }
 228
 229                    // `Some text\nSome more text` should be treated as a single line.
 230                    text.push(' ');
 231                }
 232
 233                Event::HardBreak => {
 234                    text.push('\n');
 235                }
 236
 237                Event::Text(t) => {
 238                    text.push_str(t.as_ref());
 239
 240                    let mut style = MarkdownHighlightStyle::default();
 241
 242                    if bold_depth > 0 {
 243                        style.weight = FontWeight::BOLD;
 244                    }
 245
 246                    if italic_depth > 0 {
 247                        style.italic = true;
 248                    }
 249
 250                    if strikethrough_depth > 0 {
 251                        style.strikethrough = true;
 252                    }
 253
 254                    let last_run_len = if let Some(link) = link.clone() {
 255                        region_ranges.push(prev_len..text.len());
 256                        regions.push(ParsedRegion {
 257                            code: false,
 258                            link: Some(link),
 259                        });
 260                        style.underline = true;
 261                        prev_len
 262                    } else {
 263                        // Manually scan for links
 264                        let mut finder = linkify::LinkFinder::new();
 265                        finder.kinds(&[linkify::LinkKind::Url]);
 266                        let mut last_link_len = prev_len;
 267                        for link in finder.links(t) {
 268                            let start = link.start();
 269                            let end = link.end();
 270                            let range = (prev_len + start)..(prev_len + end);
 271                            link_ranges.push(range.clone());
 272                            link_urls.push(link.as_str().to_string());
 273
 274                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 275                            if style != MarkdownHighlightStyle::default()
 276                                && last_link_len < link.start()
 277                            {
 278                                highlights.push((
 279                                    last_link_len..link.start(),
 280                                    MarkdownHighlight::Style(style.clone()),
 281                                ));
 282                            }
 283
 284                            highlights.push((
 285                                range.clone(),
 286                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 287                                    underline: true,
 288                                    ..style
 289                                }),
 290                            ));
 291                            region_ranges.push(range.clone());
 292                            regions.push(ParsedRegion {
 293                                code: false,
 294                                link: Some(Link::Web {
 295                                    url: link.as_str().to_string(),
 296                                }),
 297                            });
 298
 299                            last_link_len = end;
 300                        }
 301                        last_link_len
 302                    };
 303
 304                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 305                        let mut new_highlight = true;
 306                        if let Some((last_range, last_style)) = highlights.last_mut() {
 307                            if last_range.end == last_run_len
 308                                && last_style == &MarkdownHighlight::Style(style.clone())
 309                            {
 310                                last_range.end = text.len();
 311                                new_highlight = false;
 312                            }
 313                        }
 314                        if new_highlight {
 315                            highlights
 316                                .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
 317                        }
 318                    }
 319                }
 320
 321                // Note: This event means "inline code" and not "code block"
 322                Event::Code(t) => {
 323                    text.push_str(t.as_ref());
 324                    region_ranges.push(prev_len..text.len());
 325
 326                    if link.is_some() {
 327                        highlights.push((
 328                            prev_len..text.len(),
 329                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 330                                underline: true,
 331                                ..Default::default()
 332                            }),
 333                        ));
 334                    }
 335
 336                    regions.push(ParsedRegion {
 337                        code: true,
 338                        link: link.clone(),
 339                    });
 340                }
 341
 342                Event::Start(tag) => match tag {
 343                    Tag::Emphasis => italic_depth += 1,
 344                    Tag::Strong => bold_depth += 1,
 345                    Tag::Strikethrough => strikethrough_depth += 1,
 346                    Tag::Link {
 347                        link_type: _,
 348                        dest_url,
 349                        title: _,
 350                        id: _,
 351                    } => {
 352                        link = Link::identify(
 353                            self.file_location_directory.clone(),
 354                            dest_url.to_string(),
 355                        );
 356                    }
 357                    _ => {
 358                        break;
 359                    }
 360                },
 361
 362                Event::End(tag) => match tag {
 363                    TagEnd::Emphasis => {
 364                        italic_depth -= 1;
 365                    }
 366                    TagEnd::Strong => {
 367                        bold_depth -= 1;
 368                    }
 369                    TagEnd::Strikethrough => {
 370                        strikethrough_depth -= 1;
 371                    }
 372                    TagEnd::Link => {
 373                        link = None;
 374                    }
 375                    TagEnd::Paragraph => {
 376                        self.cursor += 1;
 377                        break;
 378                    }
 379                    _ => {
 380                        break;
 381                    }
 382                },
 383
 384                _ => {
 385                    break;
 386                }
 387            }
 388
 389            self.cursor += 1;
 390        }
 391
 392        ParsedMarkdownText {
 393            source_range,
 394            contents: text,
 395            highlights,
 396            regions,
 397            region_ranges,
 398        }
 399    }
 400
 401    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 402        let (_event, source_range) = self.previous().unwrap();
 403        let source_range = source_range.clone();
 404        let text = self.parse_text(true, None);
 405
 406        // Advance past the heading end tag
 407        self.cursor += 1;
 408
 409        ParsedMarkdownHeading {
 410            source_range: source_range.clone(),
 411            level: match level {
 412                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 413                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 414                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 415                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 416                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 417                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 418            },
 419            contents: text,
 420        }
 421    }
 422
 423    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 424        let (_event, source_range) = self.previous().unwrap();
 425        let source_range = source_range.clone();
 426        let mut header = ParsedMarkdownTableRow::new();
 427        let mut body = vec![];
 428        let mut current_row = vec![];
 429        let mut in_header = true;
 430        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 431
 432        loop {
 433            if self.eof() {
 434                break;
 435            }
 436
 437            let (current, source_range) = self.current().unwrap();
 438            let source_range = source_range.clone();
 439            match current {
 440                Event::Start(Tag::TableHead)
 441                | Event::Start(Tag::TableRow)
 442                | Event::End(TagEnd::TableCell) => {
 443                    self.cursor += 1;
 444                }
 445                Event::Start(Tag::TableCell) => {
 446                    self.cursor += 1;
 447                    let cell_contents = self.parse_text(false, Some(source_range));
 448                    current_row.push(cell_contents);
 449                }
 450                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 451                    self.cursor += 1;
 452                    let new_row = std::mem::take(&mut current_row);
 453                    if in_header {
 454                        header.children = new_row;
 455                        in_header = false;
 456                    } else {
 457                        let row = ParsedMarkdownTableRow::with_children(new_row);
 458                        body.push(row);
 459                    }
 460                }
 461                Event::End(TagEnd::Table) => {
 462                    self.cursor += 1;
 463                    break;
 464                }
 465                _ => {
 466                    break;
 467                }
 468            }
 469        }
 470
 471        ParsedMarkdownTable {
 472            source_range,
 473            header,
 474            body,
 475            column_alignments,
 476        }
 477    }
 478
 479    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 480        match alignment {
 481            Alignment::None => ParsedMarkdownTableAlignment::None,
 482            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 483            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 484            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 485        }
 486    }
 487
 488    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 489        let (_, list_source_range) = self.previous().unwrap();
 490
 491        let mut items = Vec::new();
 492        let mut items_stack = vec![MarkdownListItem::default()];
 493        let mut depth = 1;
 494        let mut order = order;
 495        let mut order_stack = Vec::new();
 496
 497        let mut insertion_indices = FxHashMap::default();
 498        let mut source_ranges = FxHashMap::default();
 499        let mut start_item_range = list_source_range.clone();
 500
 501        while !self.eof() {
 502            let (current, source_range) = self.current().unwrap();
 503            match current {
 504                Event::Start(Tag::List(new_order)) => {
 505                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 506                        insertion_indices.insert(depth, items.len());
 507                    }
 508
 509                    // We will use the start of the nested list as the end for the current item's range,
 510                    // because we don't care about the hierarchy of list items
 511                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 512                        e.insert(start_item_range.start..source_range.start);
 513                    }
 514
 515                    order_stack.push(order);
 516                    order = *new_order;
 517                    self.cursor += 1;
 518                    depth += 1;
 519                }
 520                Event::End(TagEnd::List(_)) => {
 521                    order = order_stack.pop().flatten();
 522                    self.cursor += 1;
 523                    depth -= 1;
 524
 525                    if depth == 0 {
 526                        break;
 527                    }
 528                }
 529                Event::Start(Tag::Item) => {
 530                    start_item_range = source_range.clone();
 531
 532                    self.cursor += 1;
 533                    items_stack.push(MarkdownListItem::default());
 534
 535                    let mut task_list = None;
 536                    // Check for task list marker (`- [ ]` or `- [x]`)
 537                    if let Some(event) = self.current_event() {
 538                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 539                        if event == &Event::Start(Tag::Paragraph) {
 540                            self.cursor += 1;
 541                        }
 542
 543                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 544                            task_list = Some((*checked, range.clone()));
 545                            self.cursor += 1;
 546                        }
 547                    }
 548
 549                    if let Some((event, range)) = self.current() {
 550                        // This is a plain list item.
 551                        // For example `- some text` or `1. [Docs](./docs.md)`
 552                        if MarkdownParser::is_text_like(event) {
 553                            let text = self.parse_text(false, Some(range.clone()));
 554                            let block = ParsedMarkdownElement::Paragraph(text);
 555                            if let Some(content) = items_stack.last_mut() {
 556                                let item_type = if let Some((checked, range)) = task_list {
 557                                    ParsedMarkdownListItemType::Task(checked, range)
 558                                } else if let Some(order) = order {
 559                                    ParsedMarkdownListItemType::Ordered(order)
 560                                } else {
 561                                    ParsedMarkdownListItemType::Unordered
 562                                };
 563                                content.item_type = item_type;
 564                                content.content.push(block);
 565                            }
 566                        } else {
 567                            let block = self.parse_block().await;
 568                            if let Some(block) = block {
 569                                if let Some(list_item) = items_stack.last_mut() {
 570                                    list_item.content.extend(block);
 571                                }
 572                            }
 573                        }
 574                    }
 575
 576                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 577                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 578                        self.cursor += 1;
 579                    }
 580                }
 581                Event::End(TagEnd::Item) => {
 582                    self.cursor += 1;
 583
 584                    if let Some(current) = order {
 585                        order = Some(current + 1);
 586                    }
 587
 588                    if let Some(list_item) = items_stack.pop() {
 589                        let source_range = source_ranges
 590                            .remove(&depth)
 591                            .unwrap_or(start_item_range.clone());
 592
 593                        // We need to remove the last character of the source range, because it includes the newline character
 594                        let source_range = source_range.start..source_range.end - 1;
 595                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 596                            source_range,
 597                            content: list_item.content,
 598                            depth,
 599                            item_type: list_item.item_type,
 600                        });
 601
 602                        if let Some(index) = insertion_indices.get(&depth) {
 603                            items.insert(*index, item);
 604                            insertion_indices.remove(&depth);
 605                        } else {
 606                            items.push(item);
 607                        }
 608                    }
 609                }
 610                _ => {
 611                    if depth == 0 {
 612                        break;
 613                    }
 614                    // This can only happen if a list item starts with more then one paragraph,
 615                    // or the list item contains blocks that should be rendered after the nested list items
 616                    let block = self.parse_block().await;
 617                    if let Some(block) = block {
 618                        if let Some(list_item) = items_stack.last_mut() {
 619                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 620                            if !insertion_indices.contains_key(&depth) {
 621                                list_item.content.extend(block);
 622                                continue;
 623                            }
 624                        }
 625
 626                        // Otherwise we need to insert the block after all the nested items
 627                        // that have been parsed so far
 628                        items.extend(block);
 629                    } else {
 630                        self.cursor += 1;
 631                    }
 632                }
 633            }
 634        }
 635
 636        items
 637    }
 638
 639    #[async_recursion]
 640    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 641        let (_event, source_range) = self.previous().unwrap();
 642        let source_range = source_range.clone();
 643        let mut nested_depth = 1;
 644
 645        let mut children: Vec<ParsedMarkdownElement> = vec![];
 646
 647        while !self.eof() {
 648            let block = self.parse_block().await;
 649
 650            if let Some(block) = block {
 651                children.extend(block);
 652            } else {
 653                break;
 654            }
 655
 656            if self.eof() {
 657                break;
 658            }
 659
 660            let (current, _source_range) = self.current().unwrap();
 661            match current {
 662                // This is a nested block quote.
 663                // Record that we're in a nested block quote and continue parsing.
 664                // We don't need to advance the cursor since the next
 665                // call to `parse_block` will handle it.
 666                Event::Start(Tag::BlockQuote(_kind)) => {
 667                    nested_depth += 1;
 668                }
 669                Event::End(TagEnd::BlockQuote(_kind)) => {
 670                    nested_depth -= 1;
 671                    if nested_depth == 0 {
 672                        self.cursor += 1;
 673                        break;
 674                    }
 675                }
 676                _ => {}
 677            };
 678        }
 679
 680        ParsedMarkdownBlockQuote {
 681            source_range,
 682            children,
 683        }
 684    }
 685
 686    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 687        let (_event, source_range) = self.previous().unwrap();
 688        let source_range = source_range.clone();
 689        let mut code = String::new();
 690
 691        while !self.eof() {
 692            let (current, _source_range) = self.current().unwrap();
 693            match current {
 694                Event::Text(text) => {
 695                    code.push_str(text);
 696                    self.cursor += 1;
 697                }
 698                Event::End(TagEnd::CodeBlock) => {
 699                    self.cursor += 1;
 700                    break;
 701                }
 702                _ => {
 703                    break;
 704                }
 705            }
 706        }
 707
 708        let highlights = if let Some(language) = &language {
 709            if let Some(registry) = &self.language_registry {
 710                let rope: language::Rope = code.as_str().into();
 711                registry
 712                    .language_for_name_or_extension(language)
 713                    .await
 714                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 715                    .ok()
 716            } else {
 717                None
 718            }
 719        } else {
 720            None
 721        };
 722
 723        ParsedMarkdownCodeBlock {
 724            source_range,
 725            contents: code.trim().to_string().into(),
 726            language,
 727            highlights,
 728        }
 729    }
 730}
 731
 732#[cfg(test)]
 733mod tests {
 734    use super::*;
 735
 736    use gpui::BackgroundExecutor;
 737    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 738    use pretty_assertions::assert_eq;
 739    use ParsedMarkdownListItemType::*;
 740
 741    async fn parse(input: &str) -> ParsedMarkdown {
 742        parse_markdown(input, None, None).await
 743    }
 744
 745    #[gpui::test]
 746    async fn test_headings() {
 747        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 748
 749        assert_eq!(
 750            parsed.children,
 751            vec![
 752                h1(text("Heading one", 2..13), 0..14),
 753                h2(text("Heading two", 17..28), 14..29),
 754                h3(text("Heading three", 33..46), 29..46),
 755            ]
 756        );
 757    }
 758
 759    #[gpui::test]
 760    async fn test_newlines_dont_new_paragraphs() {
 761        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 762
 763        assert_eq!(
 764            parsed.children,
 765            vec![p("Some text that is bolded and italicized", 0..46)]
 766        );
 767    }
 768
 769    #[gpui::test]
 770    async fn test_heading_with_paragraph() {
 771        let parsed = parse("# Zed\nThe editor").await;
 772
 773        assert_eq!(
 774            parsed.children,
 775            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 776        );
 777    }
 778
 779    #[gpui::test]
 780    async fn test_double_newlines_do_new_paragraphs() {
 781        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 782
 783        assert_eq!(
 784            parsed.children,
 785            vec![
 786                p("Some text that is bolded", 0..29),
 787                p("and italicized", 31..47),
 788            ]
 789        );
 790    }
 791
 792    #[gpui::test]
 793    async fn test_bold_italic_text() {
 794        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 795
 796        assert_eq!(
 797            parsed.children,
 798            vec![p("Some text that is bolded and italicized", 0..45)]
 799        );
 800    }
 801
 802    #[gpui::test]
 803    async fn test_nested_bold_strikethrough_text() {
 804        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 805
 806        assert_eq!(parsed.children.len(), 1);
 807        assert_eq!(
 808            parsed.children[0],
 809            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 810                source_range: 0..35,
 811                contents: "Some bostrikethroughld text".to_string(),
 812                highlights: Vec::new(),
 813                region_ranges: Vec::new(),
 814                regions: Vec::new(),
 815            })
 816        );
 817
 818        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 819            text
 820        } else {
 821            panic!("Expected a paragraph");
 822        };
 823        assert_eq!(
 824            paragraph.highlights,
 825            vec![
 826                (
 827                    5..7,
 828                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 829                        weight: FontWeight::BOLD,
 830                        ..Default::default()
 831                    }),
 832                ),
 833                (
 834                    7..20,
 835                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 836                        weight: FontWeight::BOLD,
 837                        strikethrough: true,
 838                        ..Default::default()
 839                    }),
 840                ),
 841                (
 842                    20..22,
 843                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 844                        weight: FontWeight::BOLD,
 845                        ..Default::default()
 846                    }),
 847                ),
 848            ]
 849        );
 850    }
 851
 852    #[gpui::test]
 853    async fn test_raw_links_detection() {
 854        let parsed = parse("Checkout this https://zed.dev link").await;
 855
 856        assert_eq!(
 857            parsed.children,
 858            vec![p("Checkout this https://zed.dev link", 0..34)]
 859        );
 860
 861        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 862            text
 863        } else {
 864            panic!("Expected a paragraph");
 865        };
 866        assert_eq!(
 867            paragraph.highlights,
 868            vec![(
 869                14..29,
 870                MarkdownHighlight::Style(MarkdownHighlightStyle {
 871                    underline: true,
 872                    ..Default::default()
 873                }),
 874            )]
 875        );
 876        assert_eq!(
 877            paragraph.regions,
 878            vec![ParsedRegion {
 879                code: false,
 880                link: Some(Link::Web {
 881                    url: "https://zed.dev".to_string()
 882                }),
 883            }]
 884        );
 885        assert_eq!(paragraph.region_ranges, vec![14..29]);
 886    }
 887
 888    #[gpui::test]
 889    async fn test_header_only_table() {
 890        let markdown = "\
 891| Header 1 | Header 2 |
 892|----------|----------|
 893
 894Some other content
 895";
 896
 897        let expected_table = table(
 898            0..48,
 899            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 900            vec![],
 901        );
 902
 903        assert_eq!(
 904            parse(markdown).await.children[0],
 905            ParsedMarkdownElement::Table(expected_table)
 906        );
 907    }
 908
 909    #[gpui::test]
 910    async fn test_basic_table() {
 911        let markdown = "\
 912| Header 1 | Header 2 |
 913|----------|----------|
 914| Cell 1   | Cell 2   |
 915| Cell 3   | Cell 4   |";
 916
 917        let expected_table = table(
 918            0..95,
 919            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 920            vec![
 921                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 922                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 923            ],
 924        );
 925
 926        assert_eq!(
 927            parse(markdown).await.children[0],
 928            ParsedMarkdownElement::Table(expected_table)
 929        );
 930    }
 931
 932    #[gpui::test]
 933    async fn test_list_basic() {
 934        let parsed = parse(
 935            "\
 936* Item 1
 937* Item 2
 938* Item 3
 939",
 940        )
 941        .await;
 942
 943        assert_eq!(
 944            parsed.children,
 945            vec![
 946                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
 947                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
 948                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
 949            ],
 950        );
 951    }
 952
 953    #[gpui::test]
 954    async fn test_list_with_tasks() {
 955        let parsed = parse(
 956            "\
 957- [ ] TODO
 958- [x] Checked
 959",
 960        )
 961        .await;
 962
 963        assert_eq!(
 964            parsed.children,
 965            vec![
 966                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
 967                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
 968            ],
 969        );
 970    }
 971
 972    #[gpui::test]
 973    async fn test_list_with_indented_task() {
 974        let parsed = parse(
 975            "\
 976- [ ] TODO
 977  - [x] Checked
 978  - Unordered
 979  1. Number 1
 980  1. Number 2
 9811. Number A
 982",
 983        )
 984        .await;
 985
 986        assert_eq!(
 987            parsed.children,
 988            vec![
 989                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
 990                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
 991                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
 992                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
 993                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
 994                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
 995            ],
 996        );
 997    }
 998
 999    #[gpui::test]
1000    async fn test_list_with_linebreak_is_handled_correctly() {
1001        let parsed = parse(
1002            "\
1003- [ ] Task 1
1004
1005- [x] Task 2
1006",
1007        )
1008        .await;
1009
1010        assert_eq!(
1011            parsed.children,
1012            vec![
1013                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1014                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1015            ],
1016        );
1017    }
1018
1019    #[gpui::test]
1020    async fn test_list_nested() {
1021        let parsed = parse(
1022            "\
1023* Item 1
1024* Item 2
1025* Item 3
1026
10271. Hello
10281. Two
1029   1. Three
10302. Four
10313. Five
1032
1033* First
1034  1. Hello
1035     1. Goodbyte
1036        - Inner
1037        - Inner
1038  2. Goodbyte
1039        - Next item empty
1040        -
1041* Last
1042",
1043        )
1044        .await;
1045
1046        assert_eq!(
1047            parsed.children,
1048            vec![
1049                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1050                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1051                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1052                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1053                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1054                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1055                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1056                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1057                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1058                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1059                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1060                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1061                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1062                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1063                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1064                list_item(186..190, 3, Unordered, vec![]),
1065                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1066            ]
1067        );
1068    }
1069
1070    #[gpui::test]
1071    async fn test_list_with_nested_content() {
1072        let parsed = parse(
1073            "\
1074*   This is a list item with two paragraphs.
1075
1076    This is the second paragraph in the list item.
1077",
1078        )
1079        .await;
1080
1081        assert_eq!(
1082            parsed.children,
1083            vec![list_item(
1084                0..96,
1085                1,
1086                Unordered,
1087                vec![
1088                    p("This is a list item with two paragraphs.", 4..44),
1089                    p("This is the second paragraph in the list item.", 50..97)
1090                ],
1091            ),],
1092        );
1093    }
1094
1095    #[gpui::test]
1096    async fn test_nested_list_with_paragraph_inside() {
1097        let parsed = parse(
1098            "\
10991. a
1100    1. b
1101        1. c
1102
1103    text
1104
1105    1. d
1106",
1107        )
1108        .await;
1109
1110        assert_eq!(
1111            parsed.children,
1112            vec![
1113                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1114                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1115                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1116                p("text", 32..37),
1117                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1118            ],
1119        );
1120    }
1121
1122    #[gpui::test]
1123    async fn test_list_with_leading_text() {
1124        let parsed = parse(
1125            "\
1126* `code`
1127* **bold**
1128* [link](https://example.com)
1129",
1130        )
1131        .await;
1132
1133        assert_eq!(
1134            parsed.children,
1135            vec![
1136                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1137                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1138                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1139            ],
1140        );
1141    }
1142
1143    #[gpui::test]
1144    async fn test_simple_block_quote() {
1145        let parsed = parse("> Simple block quote with **styled text**").await;
1146
1147        assert_eq!(
1148            parsed.children,
1149            vec![block_quote(
1150                vec![p("Simple block quote with styled text", 2..41)],
1151                0..41
1152            )]
1153        );
1154    }
1155
1156    #[gpui::test]
1157    async fn test_simple_block_quote_with_multiple_lines() {
1158        let parsed = parse(
1159            "\
1160> # Heading
1161> More
1162> text
1163>
1164> More text
1165",
1166        )
1167        .await;
1168
1169        assert_eq!(
1170            parsed.children,
1171            vec![block_quote(
1172                vec![
1173                    h1(text("Heading", 4..11), 2..12),
1174                    p("More text", 14..26),
1175                    p("More text", 30..40)
1176                ],
1177                0..40
1178            )]
1179        );
1180    }
1181
1182    #[gpui::test]
1183    async fn test_nested_block_quote() {
1184        let parsed = parse(
1185            "\
1186> A
1187>
1188> > # B
1189>
1190> C
1191
1192More text
1193",
1194        )
1195        .await;
1196
1197        assert_eq!(
1198            parsed.children,
1199            vec![
1200                block_quote(
1201                    vec![
1202                        p("A", 2..4),
1203                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1204                        p("C", 18..20)
1205                    ],
1206                    0..20
1207                ),
1208                p("More text", 21..31)
1209            ]
1210        );
1211    }
1212
1213    #[gpui::test]
1214    async fn test_code_block() {
1215        let parsed = parse(
1216            "\
1217```
1218fn main() {
1219    return 0;
1220}
1221```
1222",
1223        )
1224        .await;
1225
1226        assert_eq!(
1227            parsed.children,
1228            vec![code_block(
1229                None,
1230                "fn main() {\n    return 0;\n}",
1231                0..35,
1232                None
1233            )]
1234        );
1235    }
1236
1237    #[gpui::test]
1238    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1239        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1240        language_registry.add(rust_lang());
1241
1242        let parsed = parse_markdown(
1243            "\
1244```rust
1245fn main() {
1246    return 0;
1247}
1248```
1249",
1250            None,
1251            Some(language_registry),
1252        )
1253        .await;
1254
1255        assert_eq!(
1256            parsed.children,
1257            vec![code_block(
1258                Some("rust".to_string()),
1259                "fn main() {\n    return 0;\n}",
1260                0..39,
1261                Some(vec![])
1262            )]
1263        );
1264    }
1265
1266    fn rust_lang() -> Arc<Language> {
1267        Arc::new(Language::new(
1268            LanguageConfig {
1269                name: "Rust".into(),
1270                matcher: LanguageMatcher {
1271                    path_suffixes: vec!["rs".into()],
1272                    ..Default::default()
1273                },
1274                collapsed_placeholder: " /* ... */ ".to_string(),
1275                ..Default::default()
1276            },
1277            Some(tree_sitter_rust::LANGUAGE.into()),
1278        ))
1279    }
1280
1281    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1282        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1283            source_range,
1284            level: HeadingLevel::H1,
1285            contents,
1286        })
1287    }
1288
1289    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1290        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1291            source_range,
1292            level: HeadingLevel::H2,
1293            contents,
1294        })
1295    }
1296
1297    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1298        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1299            source_range,
1300            level: HeadingLevel::H3,
1301            contents,
1302        })
1303    }
1304
1305    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1306        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1307    }
1308
1309    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1310        ParsedMarkdownText {
1311            highlights: Vec::new(),
1312            region_ranges: Vec::new(),
1313            regions: Vec::new(),
1314            source_range,
1315            contents: contents.to_string(),
1316        }
1317    }
1318
1319    fn block_quote(
1320        children: Vec<ParsedMarkdownElement>,
1321        source_range: Range<usize>,
1322    ) -> ParsedMarkdownElement {
1323        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1324            source_range,
1325            children,
1326        })
1327    }
1328
1329    fn code_block(
1330        language: Option<String>,
1331        code: &str,
1332        source_range: Range<usize>,
1333        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1334    ) -> ParsedMarkdownElement {
1335        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1336            source_range,
1337            language,
1338            contents: code.to_string().into(),
1339            highlights,
1340        })
1341    }
1342
1343    fn list_item(
1344        source_range: Range<usize>,
1345        depth: u16,
1346        item_type: ParsedMarkdownListItemType,
1347        content: Vec<ParsedMarkdownElement>,
1348    ) -> ParsedMarkdownElement {
1349        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1350            source_range,
1351            item_type,
1352            depth,
1353            content,
1354        })
1355    }
1356
1357    fn table(
1358        source_range: Range<usize>,
1359        header: ParsedMarkdownTableRow,
1360        body: Vec<ParsedMarkdownTableRow>,
1361    ) -> ParsedMarkdownTable {
1362        ParsedMarkdownTable {
1363            column_alignments: Vec::new(),
1364            source_range,
1365            header,
1366            body,
1367        }
1368    }
1369
1370    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1371        ParsedMarkdownTableRow { children }
1372    }
1373
1374    impl PartialEq for ParsedMarkdownTable {
1375        fn eq(&self, other: &Self) -> bool {
1376            self.source_range == other.source_range
1377                && self.header == other.header
1378                && self.body == other.body
1379        }
1380    }
1381
1382    impl PartialEq for ParsedMarkdownText {
1383        fn eq(&self, other: &Self) -> bool {
1384            self.source_range == other.source_range && self.contents == other.contents
1385        }
1386    }
1387}