markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let options = Options::all();
  15    let parser = Parser::new_ext(markdown_input, options);
  16    let parser = MarkdownParser::new(
  17        parser.into_offset_iter().collect(),
  18        file_location_directory,
  19        language_registry,
  20    );
  21    let renderer = parser.parse_document().await;
  22    ParsedMarkdown {
  23        children: renderer.parsed,
  24    }
  25}
  26
  27struct MarkdownParser<'a> {
  28    tokens: Vec<(Event<'a>, Range<usize>)>,
  29    /// The current index in the tokens array
  30    cursor: usize,
  31    /// The blocks that we have successfully parsed so far
  32    parsed: Vec<ParsedMarkdownElement>,
  33    file_location_directory: Option<PathBuf>,
  34    language_registry: Option<Arc<LanguageRegistry>>,
  35}
  36
  37impl<'a> MarkdownParser<'a> {
  38    fn new(
  39        tokens: Vec<(Event<'a>, Range<usize>)>,
  40        file_location_directory: Option<PathBuf>,
  41        language_registry: Option<Arc<LanguageRegistry>>,
  42    ) -> Self {
  43        Self {
  44            tokens,
  45            file_location_directory,
  46            language_registry,
  47            cursor: 0,
  48            parsed: vec![],
  49        }
  50    }
  51
  52    fn eof(&self) -> bool {
  53        if self.tokens.is_empty() {
  54            return true;
  55        }
  56        self.cursor >= self.tokens.len() - 1
  57    }
  58
  59    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  60        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  61            return self.tokens.last();
  62        }
  63        return self.tokens.get(self.cursor + steps);
  64    }
  65
  66    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  67        if self.cursor == 0 || self.cursor > self.tokens.len() {
  68            return None;
  69        }
  70        return self.tokens.get(self.cursor - 1);
  71    }
  72
  73    fn current(&self) -> Option<&(Event, Range<usize>)> {
  74        return self.peek(0);
  75    }
  76
  77    fn current_event(&self) -> Option<&Event> {
  78        return self.current().map(|(event, _)| event);
  79    }
  80
  81    fn is_text_like(event: &Event) -> bool {
  82        match event {
  83            Event::Text(_)
  84            // Represent an inline code block
  85            | Event::Code(_)
  86            | Event::Html(_)
  87            | Event::FootnoteReference(_)
  88            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
  89            | Event::Start(Tag::Emphasis)
  90            | Event::Start(Tag::Strong)
  91            | Event::Start(Tag::Strikethrough)
  92            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
  93                return true;
  94            }
  95            _ => return false,
  96        }
  97    }
  98
  99    async fn parse_document(mut self) -> Self {
 100        while !self.eof() {
 101            if let Some(block) = self.parse_block().await {
 102                self.parsed.extend(block);
 103            }
 104        }
 105        self
 106    }
 107
 108    #[async_recursion]
 109    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 110        let (current, source_range) = self.current().unwrap();
 111        let source_range = source_range.clone();
 112        match current {
 113            Event::Start(tag) => match tag {
 114                Tag::Paragraph => {
 115                    self.cursor += 1;
 116                    let text = self.parse_text(false, Some(source_range));
 117                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 118                }
 119                Tag::Heading {
 120                    level,
 121                    id: _,
 122                    classes: _,
 123                    attrs: _,
 124                } => {
 125                    let level = *level;
 126                    self.cursor += 1;
 127                    let heading = self.parse_heading(level);
 128                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 129                }
 130                Tag::Table(alignment) => {
 131                    let alignment = alignment.clone();
 132                    self.cursor += 1;
 133                    let table = self.parse_table(alignment);
 134                    Some(vec![ParsedMarkdownElement::Table(table)])
 135                }
 136                Tag::List(order) => {
 137                    let order = *order;
 138                    self.cursor += 1;
 139                    let list = self.parse_list(order).await;
 140                    Some(list)
 141                }
 142                Tag::BlockQuote => {
 143                    self.cursor += 1;
 144                    let block_quote = self.parse_block_quote().await;
 145                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 146                }
 147                Tag::CodeBlock(kind) => {
 148                    let language = match kind {
 149                        pulldown_cmark::CodeBlockKind::Indented => None,
 150                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 151                            if language.is_empty() {
 152                                None
 153                            } else {
 154                                Some(language.to_string())
 155                            }
 156                        }
 157                    };
 158
 159                    self.cursor += 1;
 160
 161                    let code_block = self.parse_code_block(language).await;
 162                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 163                }
 164                _ => {
 165                    self.cursor += 1;
 166                    None
 167                }
 168            },
 169            Event::Rule => {
 170                let source_range = source_range.clone();
 171                self.cursor += 1;
 172                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 173            }
 174            _ => {
 175                self.cursor += 1;
 176                None
 177            }
 178        }
 179    }
 180
 181    fn parse_text(
 182        &mut self,
 183        should_complete_on_soft_break: bool,
 184        source_range: Option<Range<usize>>,
 185    ) -> ParsedMarkdownText {
 186        let source_range = source_range.unwrap_or_else(|| {
 187            self.current()
 188                .map(|(_, range)| range.clone())
 189                .unwrap_or_default()
 190        });
 191
 192        let mut text = String::new();
 193        let mut bold_depth = 0;
 194        let mut italic_depth = 0;
 195        let mut strikethrough_depth = 0;
 196        let mut link: Option<Link> = None;
 197        let mut region_ranges: Vec<Range<usize>> = vec![];
 198        let mut regions: Vec<ParsedRegion> = vec![];
 199        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 200
 201        let mut link_urls: Vec<String> = vec![];
 202        let mut link_ranges: Vec<Range<usize>> = vec![];
 203
 204        loop {
 205            if self.eof() {
 206                break;
 207            }
 208
 209            let (current, _source_range) = self.current().unwrap();
 210            let prev_len = text.len();
 211            match current {
 212                Event::SoftBreak => {
 213                    if should_complete_on_soft_break {
 214                        break;
 215                    }
 216
 217                    // `Some text\nSome more text` should be treated as a single line.
 218                    text.push(' ');
 219                }
 220
 221                Event::HardBreak => {
 222                    text.push('\n');
 223                }
 224
 225                Event::Text(t) => {
 226                    text.push_str(t.as_ref());
 227
 228                    let mut style = MarkdownHighlightStyle::default();
 229
 230                    if bold_depth > 0 {
 231                        style.weight = FontWeight::BOLD;
 232                    }
 233
 234                    if italic_depth > 0 {
 235                        style.italic = true;
 236                    }
 237
 238                    if strikethrough_depth > 0 {
 239                        style.strikethrough = true;
 240                    }
 241
 242                    let last_run_len = if let Some(link) = link.clone() {
 243                        region_ranges.push(prev_len..text.len());
 244                        regions.push(ParsedRegion {
 245                            code: false,
 246                            link: Some(link),
 247                        });
 248                        style.underline = true;
 249                        prev_len
 250                    } else {
 251                        // Manually scan for links
 252                        let mut finder = linkify::LinkFinder::new();
 253                        finder.kinds(&[linkify::LinkKind::Url]);
 254                        let mut last_link_len = prev_len;
 255                        for link in finder.links(&t) {
 256                            let start = link.start();
 257                            let end = link.end();
 258                            let range = (prev_len + start)..(prev_len + end);
 259                            link_ranges.push(range.clone());
 260                            link_urls.push(link.as_str().to_string());
 261
 262                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 263                            if style != MarkdownHighlightStyle::default()
 264                                && last_link_len < link.start()
 265                            {
 266                                highlights.push((
 267                                    last_link_len..link.start(),
 268                                    MarkdownHighlight::Style(style.clone()),
 269                                ));
 270                            }
 271
 272                            highlights.push((
 273                                range.clone(),
 274                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 275                                    underline: true,
 276                                    ..style
 277                                }),
 278                            ));
 279                            region_ranges.push(range.clone());
 280                            regions.push(ParsedRegion {
 281                                code: false,
 282                                link: Some(Link::Web {
 283                                    url: link.as_str().to_string(),
 284                                }),
 285                            });
 286
 287                            last_link_len = end;
 288                        }
 289                        last_link_len
 290                    };
 291
 292                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 293                        let mut new_highlight = true;
 294                        if let Some((last_range, last_style)) = highlights.last_mut() {
 295                            if last_range.end == last_run_len
 296                                && last_style == &MarkdownHighlight::Style(style.clone())
 297                            {
 298                                last_range.end = text.len();
 299                                new_highlight = false;
 300                            }
 301                        }
 302                        if new_highlight {
 303                            highlights
 304                                .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
 305                        }
 306                    }
 307                }
 308
 309                // Note: This event means "inline code" and not "code block"
 310                Event::Code(t) => {
 311                    text.push_str(t.as_ref());
 312                    region_ranges.push(prev_len..text.len());
 313
 314                    if link.is_some() {
 315                        highlights.push((
 316                            prev_len..text.len(),
 317                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 318                                underline: true,
 319                                ..Default::default()
 320                            }),
 321                        ));
 322                    }
 323
 324                    regions.push(ParsedRegion {
 325                        code: true,
 326                        link: link.clone(),
 327                    });
 328                }
 329
 330                Event::Start(tag) => match tag {
 331                    Tag::Emphasis => italic_depth += 1,
 332                    Tag::Strong => bold_depth += 1,
 333                    Tag::Strikethrough => strikethrough_depth += 1,
 334                    Tag::Link {
 335                        link_type: _,
 336                        dest_url,
 337                        title: _,
 338                        id: _,
 339                    } => {
 340                        link = Link::identify(
 341                            self.file_location_directory.clone(),
 342                            dest_url.to_string(),
 343                        );
 344                    }
 345                    _ => {
 346                        break;
 347                    }
 348                },
 349
 350                Event::End(tag) => match tag {
 351                    TagEnd::Emphasis => {
 352                        italic_depth -= 1;
 353                    }
 354                    TagEnd::Strong => {
 355                        bold_depth -= 1;
 356                    }
 357                    TagEnd::Strikethrough => {
 358                        strikethrough_depth -= 1;
 359                    }
 360                    TagEnd::Link => {
 361                        link = None;
 362                    }
 363                    TagEnd::Paragraph => {
 364                        self.cursor += 1;
 365                        break;
 366                    }
 367                    _ => {
 368                        break;
 369                    }
 370                },
 371
 372                _ => {
 373                    break;
 374                }
 375            }
 376
 377            self.cursor += 1;
 378        }
 379
 380        ParsedMarkdownText {
 381            source_range,
 382            contents: text,
 383            highlights,
 384            regions,
 385            region_ranges,
 386        }
 387    }
 388
 389    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 390        let (_event, source_range) = self.previous().unwrap();
 391        let source_range = source_range.clone();
 392        let text = self.parse_text(true, None);
 393
 394        // Advance past the heading end tag
 395        self.cursor += 1;
 396
 397        ParsedMarkdownHeading {
 398            source_range: source_range.clone(),
 399            level: match level {
 400                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 401                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 402                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 403                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 404                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 405                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 406            },
 407            contents: text,
 408        }
 409    }
 410
 411    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 412        let (_event, source_range) = self.previous().unwrap();
 413        let source_range = source_range.clone();
 414        let mut header = ParsedMarkdownTableRow::new();
 415        let mut body = vec![];
 416        let mut current_row = vec![];
 417        let mut in_header = true;
 418        let column_alignments = alignment
 419            .iter()
 420            .map(|a| Self::convert_alignment(a))
 421            .collect();
 422
 423        loop {
 424            if self.eof() {
 425                break;
 426            }
 427
 428            let (current, source_range) = self.current().unwrap();
 429            let source_range = source_range.clone();
 430            match current {
 431                Event::Start(Tag::TableHead)
 432                | Event::Start(Tag::TableRow)
 433                | Event::End(TagEnd::TableCell) => {
 434                    self.cursor += 1;
 435                }
 436                Event::Start(Tag::TableCell) => {
 437                    self.cursor += 1;
 438                    let cell_contents = self.parse_text(false, Some(source_range));
 439                    current_row.push(cell_contents);
 440                }
 441                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 442                    self.cursor += 1;
 443                    let new_row = std::mem::replace(&mut current_row, vec![]);
 444                    if in_header {
 445                        header.children = new_row;
 446                        in_header = false;
 447                    } else {
 448                        let row = ParsedMarkdownTableRow::with_children(new_row);
 449                        body.push(row);
 450                    }
 451                }
 452                Event::End(TagEnd::Table) => {
 453                    self.cursor += 1;
 454                    break;
 455                }
 456                _ => {
 457                    break;
 458                }
 459            }
 460        }
 461
 462        ParsedMarkdownTable {
 463            source_range,
 464            header,
 465            body,
 466            column_alignments,
 467        }
 468    }
 469
 470    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 471        match alignment {
 472            Alignment::None => ParsedMarkdownTableAlignment::None,
 473            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 474            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 475            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 476        }
 477    }
 478
 479    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 480        let (_, list_source_range) = self.previous().unwrap();
 481
 482        let mut items = Vec::new();
 483        let mut items_stack = vec![Vec::new()];
 484        let mut depth = 1;
 485        let mut task_item = None;
 486        let mut order = order;
 487        let mut order_stack = Vec::new();
 488
 489        let mut insertion_indices = FxHashMap::default();
 490        let mut source_ranges = FxHashMap::default();
 491        let mut start_item_range = list_source_range.clone();
 492
 493        while !self.eof() {
 494            let (current, source_range) = self.current().unwrap();
 495            match current {
 496                Event::Start(Tag::List(new_order)) => {
 497                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 498                        insertion_indices.insert(depth, items.len());
 499                    }
 500
 501                    // We will use the start of the nested list as the end for the current item's range,
 502                    // because we don't care about the hierarchy of list items
 503                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 504                        e.insert(start_item_range.start..source_range.start);
 505                    }
 506
 507                    order_stack.push(order);
 508                    order = *new_order;
 509                    self.cursor += 1;
 510                    depth += 1;
 511                }
 512                Event::End(TagEnd::List(_)) => {
 513                    order = order_stack.pop().flatten();
 514                    self.cursor += 1;
 515                    depth -= 1;
 516
 517                    if depth == 0 {
 518                        break;
 519                    }
 520                }
 521                Event::Start(Tag::Item) => {
 522                    start_item_range = source_range.clone();
 523
 524                    self.cursor += 1;
 525                    items_stack.push(Vec::new());
 526
 527                    // Check for task list marker (`- [ ]` or `- [x]`)
 528                    if let Some(event) = self.current_event() {
 529                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 530                        if event == &Event::Start(Tag::Paragraph) {
 531                            self.cursor += 1;
 532                        }
 533
 534                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 535                            task_item = Some((*checked, range.clone()));
 536                            self.cursor += 1;
 537                        }
 538                    }
 539
 540                    if let Some((event, range)) = self.current() {
 541                        // This is a plain list item.
 542                        // For example `- some text` or `1. [Docs](./docs.md)`
 543                        if MarkdownParser::is_text_like(event) {
 544                            let text = self.parse_text(false, Some(range.clone()));
 545                            let block = ParsedMarkdownElement::Paragraph(text);
 546                            if let Some(content) = items_stack.last_mut() {
 547                                content.push(block);
 548                            }
 549                        } else {
 550                            let block = self.parse_block().await;
 551                            if let Some(block) = block {
 552                                if let Some(content) = items_stack.last_mut() {
 553                                    content.extend(block);
 554                                }
 555                            }
 556                        }
 557                    }
 558
 559                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 560                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 561                        self.cursor += 1;
 562                    }
 563                }
 564                Event::End(TagEnd::Item) => {
 565                    self.cursor += 1;
 566
 567                    let item_type = if let Some((checked, range)) = task_item {
 568                        ParsedMarkdownListItemType::Task(checked, range)
 569                    } else if let Some(order) = order {
 570                        ParsedMarkdownListItemType::Ordered(order)
 571                    } else {
 572                        ParsedMarkdownListItemType::Unordered
 573                    };
 574
 575                    if let Some(current) = order {
 576                        order = Some(current + 1);
 577                    }
 578
 579                    if let Some(content) = items_stack.pop() {
 580                        let source_range = source_ranges
 581                            .remove(&depth)
 582                            .unwrap_or(start_item_range.clone());
 583
 584                        // We need to remove the last character of the source range, because it includes the newline character
 585                        let source_range = source_range.start..source_range.end - 1;
 586                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 587                            source_range,
 588                            content,
 589                            depth,
 590                            item_type,
 591                        });
 592
 593                        if let Some(index) = insertion_indices.get(&depth) {
 594                            items.insert(*index, item);
 595                            insertion_indices.remove(&depth);
 596                        } else {
 597                            items.push(item);
 598                        }
 599                    }
 600
 601                    task_item = None;
 602                }
 603                _ => {
 604                    if depth == 0 {
 605                        break;
 606                    }
 607                    // This can only happen if a list item starts with more then one paragraph,
 608                    // or the list item contains blocks that should be rendered after the nested list items
 609                    let block = self.parse_block().await;
 610                    if let Some(block) = block {
 611                        if let Some(items_stack) = items_stack.last_mut() {
 612                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 613                            if !insertion_indices.contains_key(&depth) {
 614                                items_stack.extend(block);
 615                                continue;
 616                            }
 617                        }
 618
 619                        // Otherwise we need to insert the block after all the nested items
 620                        // that have been parsed so far
 621                        items.extend(block);
 622                    }
 623                }
 624            }
 625        }
 626
 627        items
 628    }
 629
 630    #[async_recursion]
 631    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 632        let (_event, source_range) = self.previous().unwrap();
 633        let source_range = source_range.clone();
 634        let mut nested_depth = 1;
 635
 636        let mut children: Vec<ParsedMarkdownElement> = vec![];
 637
 638        while !self.eof() {
 639            let block = self.parse_block().await;
 640
 641            if let Some(block) = block {
 642                children.extend(block);
 643            } else {
 644                break;
 645            }
 646
 647            if self.eof() {
 648                break;
 649            }
 650
 651            let (current, _source_range) = self.current().unwrap();
 652            match current {
 653                // This is a nested block quote.
 654                // Record that we're in a nested block quote and continue parsing.
 655                // We don't need to advance the cursor since the next
 656                // call to `parse_block` will handle it.
 657                Event::Start(Tag::BlockQuote) => {
 658                    nested_depth += 1;
 659                }
 660                Event::End(TagEnd::BlockQuote) => {
 661                    nested_depth -= 1;
 662                    if nested_depth == 0 {
 663                        self.cursor += 1;
 664                        break;
 665                    }
 666                }
 667                _ => {}
 668            };
 669        }
 670
 671        ParsedMarkdownBlockQuote {
 672            source_range,
 673            children,
 674        }
 675    }
 676
 677    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 678        let (_event, source_range) = self.previous().unwrap();
 679        let source_range = source_range.clone();
 680        let mut code = String::new();
 681
 682        while !self.eof() {
 683            let (current, _source_range) = self.current().unwrap();
 684            match current {
 685                Event::Text(text) => {
 686                    code.push_str(&text);
 687                    self.cursor += 1;
 688                }
 689                Event::End(TagEnd::CodeBlock) => {
 690                    self.cursor += 1;
 691                    break;
 692                }
 693                _ => {
 694                    break;
 695                }
 696            }
 697        }
 698
 699        let highlights = if let Some(language) = &language {
 700            if let Some(registry) = &self.language_registry {
 701                let rope: language::Rope = code.as_str().into();
 702                registry
 703                    .language_for_name_or_extension(language)
 704                    .await
 705                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 706                    .ok()
 707            } else {
 708                None
 709            }
 710        } else {
 711            None
 712        };
 713
 714        ParsedMarkdownCodeBlock {
 715            source_range,
 716            contents: code.trim().to_string().into(),
 717            language,
 718            highlights,
 719        }
 720    }
 721}
 722
 723#[cfg(test)]
 724mod tests {
 725    use super::*;
 726
 727    use gpui::BackgroundExecutor;
 728    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 729    use pretty_assertions::assert_eq;
 730
 731    use ParsedMarkdownListItemType::*;
 732
 733    async fn parse(input: &str) -> ParsedMarkdown {
 734        parse_markdown(input, None, None).await
 735    }
 736
 737    #[gpui::test]
 738    async fn test_headings() {
 739        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 740
 741        assert_eq!(
 742            parsed.children,
 743            vec![
 744                h1(text("Heading one", 2..13), 0..14),
 745                h2(text("Heading two", 17..28), 14..29),
 746                h3(text("Heading three", 33..46), 29..46),
 747            ]
 748        );
 749    }
 750
 751    #[gpui::test]
 752    async fn test_newlines_dont_new_paragraphs() {
 753        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 754
 755        assert_eq!(
 756            parsed.children,
 757            vec![p("Some text that is bolded and italicized", 0..46)]
 758        );
 759    }
 760
 761    #[gpui::test]
 762    async fn test_heading_with_paragraph() {
 763        let parsed = parse("# Zed\nThe editor").await;
 764
 765        assert_eq!(
 766            parsed.children,
 767            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 768        );
 769    }
 770
 771    #[gpui::test]
 772    async fn test_double_newlines_do_new_paragraphs() {
 773        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 774
 775        assert_eq!(
 776            parsed.children,
 777            vec![
 778                p("Some text that is bolded", 0..29),
 779                p("and italicized", 31..47),
 780            ]
 781        );
 782    }
 783
 784    #[gpui::test]
 785    async fn test_bold_italic_text() {
 786        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 787
 788        assert_eq!(
 789            parsed.children,
 790            vec![p("Some text that is bolded and italicized", 0..45)]
 791        );
 792    }
 793
 794    #[gpui::test]
 795    async fn test_nested_bold_strikethrough_text() {
 796        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 797
 798        assert_eq!(parsed.children.len(), 1);
 799        assert_eq!(
 800            parsed.children[0],
 801            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 802                source_range: 0..35,
 803                contents: "Some bostrikethroughld text".to_string(),
 804                highlights: Vec::new(),
 805                region_ranges: Vec::new(),
 806                regions: Vec::new(),
 807            })
 808        );
 809
 810        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 811            text
 812        } else {
 813            panic!("Expected a paragraph");
 814        };
 815        assert_eq!(
 816            paragraph.highlights,
 817            vec![
 818                (
 819                    5..7,
 820                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 821                        weight: FontWeight::BOLD,
 822                        ..Default::default()
 823                    }),
 824                ),
 825                (
 826                    7..20,
 827                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 828                        weight: FontWeight::BOLD,
 829                        strikethrough: true,
 830                        ..Default::default()
 831                    }),
 832                ),
 833                (
 834                    20..22,
 835                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 836                        weight: FontWeight::BOLD,
 837                        ..Default::default()
 838                    }),
 839                ),
 840            ]
 841        );
 842    }
 843
 844    #[gpui::test]
 845    async fn test_raw_links_detection() {
 846        let parsed = parse("Checkout this https://zed.dev link").await;
 847
 848        assert_eq!(
 849            parsed.children,
 850            vec![p("Checkout this https://zed.dev link", 0..34)]
 851        );
 852
 853        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 854            text
 855        } else {
 856            panic!("Expected a paragraph");
 857        };
 858        assert_eq!(
 859            paragraph.highlights,
 860            vec![(
 861                14..29,
 862                MarkdownHighlight::Style(MarkdownHighlightStyle {
 863                    underline: true,
 864                    ..Default::default()
 865                }),
 866            )]
 867        );
 868        assert_eq!(
 869            paragraph.regions,
 870            vec![ParsedRegion {
 871                code: false,
 872                link: Some(Link::Web {
 873                    url: "https://zed.dev".to_string()
 874                }),
 875            }]
 876        );
 877        assert_eq!(paragraph.region_ranges, vec![14..29]);
 878    }
 879
 880    #[gpui::test]
 881    async fn test_header_only_table() {
 882        let markdown = "\
 883| Header 1 | Header 2 |
 884|----------|----------|
 885
 886Some other content
 887";
 888
 889        let expected_table = table(
 890            0..48,
 891            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 892            vec![],
 893        );
 894
 895        assert_eq!(
 896            parse(markdown).await.children[0],
 897            ParsedMarkdownElement::Table(expected_table)
 898        );
 899    }
 900
 901    #[gpui::test]
 902    async fn test_basic_table() {
 903        let markdown = "\
 904| Header 1 | Header 2 |
 905|----------|----------|
 906| Cell 1   | Cell 2   |
 907| Cell 3   | Cell 4   |";
 908
 909        let expected_table = table(
 910            0..95,
 911            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 912            vec![
 913                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 914                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 915            ],
 916        );
 917
 918        assert_eq!(
 919            parse(markdown).await.children[0],
 920            ParsedMarkdownElement::Table(expected_table)
 921        );
 922    }
 923
 924    #[gpui::test]
 925    async fn test_list_basic() {
 926        let parsed = parse(
 927            "\
 928* Item 1
 929* Item 2
 930* Item 3
 931",
 932        )
 933        .await;
 934
 935        assert_eq!(
 936            parsed.children,
 937            vec![
 938                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
 939                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
 940                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
 941            ],
 942        );
 943    }
 944
 945    #[gpui::test]
 946    async fn test_list_with_tasks() {
 947        let parsed = parse(
 948            "\
 949- [ ] TODO
 950- [x] Checked
 951",
 952        )
 953        .await;
 954
 955        assert_eq!(
 956            parsed.children,
 957            vec![
 958                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
 959                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
 960            ],
 961        );
 962    }
 963
 964    #[gpui::test]
 965    async fn test_list_with_linebreak_is_handled_correctly() {
 966        let parsed = parse(
 967            "\
 968- [ ] Task 1
 969
 970- [x] Task 2
 971",
 972        )
 973        .await;
 974
 975        assert_eq!(
 976            parsed.children,
 977            vec![
 978                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
 979                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
 980            ],
 981        );
 982    }
 983
 984    #[gpui::test]
 985    async fn test_list_nested() {
 986        let parsed = parse(
 987            "\
 988* Item 1
 989* Item 2
 990* Item 3
 991
 9921. Hello
 9931. Two
 994   1. Three
 9952. Four
 9963. Five
 997
 998* First
 999  1. Hello
1000     1. Goodbyte
1001        - Inner
1002        - Inner
1003  2. Goodbyte
1004* Last
1005",
1006        )
1007        .await;
1008
1009        assert_eq!(
1010            parsed.children,
1011            vec![
1012                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1013                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1014                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1015                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1016                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1017                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1018                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1019                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1020                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1021                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1022                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1023                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1024                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1025                list_item(143..154, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1026                list_item(155..161, 1, Unordered, vec![p("Last", 157..161)]),
1027            ]
1028        );
1029    }
1030
1031    #[gpui::test]
1032    async fn test_list_with_nested_content() {
1033        let parsed = parse(
1034            "\
1035*   This is a list item with two paragraphs.
1036
1037    This is the second paragraph in the list item.
1038",
1039        )
1040        .await;
1041
1042        assert_eq!(
1043            parsed.children,
1044            vec![list_item(
1045                0..96,
1046                1,
1047                Unordered,
1048                vec![
1049                    p("This is a list item with two paragraphs.", 4..44),
1050                    p("This is the second paragraph in the list item.", 50..97)
1051                ],
1052            ),],
1053        );
1054    }
1055
1056    #[gpui::test]
1057    async fn test_nested_list_with_paragraph_inside() {
1058        let parsed = parse(
1059            "\
10601. a
1061    1. b
1062        1. c
1063
1064    text
1065
1066    1. d
1067",
1068        )
1069        .await;
1070
1071        assert_eq!(
1072            parsed.children,
1073            vec![
1074                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1075                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1076                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1077                p("text", 32..37),
1078                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1079            ],
1080        );
1081    }
1082
1083    #[gpui::test]
1084    async fn test_list_with_leading_text() {
1085        let parsed = parse(
1086            "\
1087* `code`
1088* **bold**
1089* [link](https://example.com)
1090",
1091        )
1092        .await;
1093
1094        assert_eq!(
1095            parsed.children,
1096            vec![
1097                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1098                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1099                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1100            ],
1101        );
1102    }
1103
1104    #[gpui::test]
1105    async fn test_simple_block_quote() {
1106        let parsed = parse("> Simple block quote with **styled text**").await;
1107
1108        assert_eq!(
1109            parsed.children,
1110            vec![block_quote(
1111                vec![p("Simple block quote with styled text", 2..41)],
1112                0..41
1113            )]
1114        );
1115    }
1116
1117    #[gpui::test]
1118    async fn test_simple_block_quote_with_multiple_lines() {
1119        let parsed = parse(
1120            "\
1121> # Heading
1122> More
1123> text
1124>
1125> More text
1126",
1127        )
1128        .await;
1129
1130        assert_eq!(
1131            parsed.children,
1132            vec![block_quote(
1133                vec![
1134                    h1(text("Heading", 4..11), 2..12),
1135                    p("More text", 14..26),
1136                    p("More text", 30..40)
1137                ],
1138                0..40
1139            )]
1140        );
1141    }
1142
1143    #[gpui::test]
1144    async fn test_nested_block_quote() {
1145        let parsed = parse(
1146            "\
1147> A
1148>
1149> > # B
1150>
1151> C
1152
1153More text
1154",
1155        )
1156        .await;
1157
1158        assert_eq!(
1159            parsed.children,
1160            vec![
1161                block_quote(
1162                    vec![
1163                        p("A", 2..4),
1164                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1165                        p("C", 18..20)
1166                    ],
1167                    0..20
1168                ),
1169                p("More text", 21..31)
1170            ]
1171        );
1172    }
1173
1174    #[gpui::test]
1175    async fn test_code_block() {
1176        let parsed = parse(
1177            "\
1178```
1179fn main() {
1180    return 0;
1181}
1182```
1183",
1184        )
1185        .await;
1186
1187        assert_eq!(
1188            parsed.children,
1189            vec![code_block(
1190                None,
1191                "fn main() {\n    return 0;\n}",
1192                0..35,
1193                None
1194            )]
1195        );
1196    }
1197
1198    #[gpui::test]
1199    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1200        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1201        language_registry.add(rust_lang());
1202
1203        let parsed = parse_markdown(
1204            "\
1205```rust
1206fn main() {
1207    return 0;
1208}
1209```
1210",
1211            None,
1212            Some(language_registry),
1213        )
1214        .await;
1215
1216        assert_eq!(
1217            parsed.children,
1218            vec![code_block(
1219                Some("rust".to_string()),
1220                "fn main() {\n    return 0;\n}",
1221                0..39,
1222                Some(vec![])
1223            )]
1224        );
1225    }
1226
1227    fn rust_lang() -> Arc<Language> {
1228        Arc::new(Language::new(
1229            LanguageConfig {
1230                name: "Rust".into(),
1231                matcher: LanguageMatcher {
1232                    path_suffixes: vec!["rs".into()],
1233                    ..Default::default()
1234                },
1235                collapsed_placeholder: " /* ... */ ".to_string(),
1236                ..Default::default()
1237            },
1238            Some(tree_sitter_rust::language()),
1239        ))
1240    }
1241
1242    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1243        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1244            source_range,
1245            level: HeadingLevel::H1,
1246            contents,
1247        })
1248    }
1249
1250    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1251        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1252            source_range,
1253            level: HeadingLevel::H2,
1254            contents,
1255        })
1256    }
1257
1258    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1259        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1260            source_range,
1261            level: HeadingLevel::H3,
1262            contents,
1263        })
1264    }
1265
1266    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1267        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1268    }
1269
1270    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1271        ParsedMarkdownText {
1272            highlights: Vec::new(),
1273            region_ranges: Vec::new(),
1274            regions: Vec::new(),
1275            source_range,
1276            contents: contents.to_string(),
1277        }
1278    }
1279
1280    fn block_quote(
1281        children: Vec<ParsedMarkdownElement>,
1282        source_range: Range<usize>,
1283    ) -> ParsedMarkdownElement {
1284        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1285            source_range,
1286            children,
1287        })
1288    }
1289
1290    fn code_block(
1291        language: Option<String>,
1292        code: &str,
1293        source_range: Range<usize>,
1294        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1295    ) -> ParsedMarkdownElement {
1296        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1297            source_range,
1298            language,
1299            contents: code.to_string().into(),
1300            highlights,
1301        })
1302    }
1303
1304    fn list_item(
1305        source_range: Range<usize>,
1306        depth: u16,
1307        item_type: ParsedMarkdownListItemType,
1308        content: Vec<ParsedMarkdownElement>,
1309    ) -> ParsedMarkdownElement {
1310        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1311            source_range,
1312            item_type,
1313            depth,
1314            content,
1315        })
1316    }
1317
1318    fn table(
1319        source_range: Range<usize>,
1320        header: ParsedMarkdownTableRow,
1321        body: Vec<ParsedMarkdownTableRow>,
1322    ) -> ParsedMarkdownTable {
1323        ParsedMarkdownTable {
1324            column_alignments: Vec::new(),
1325            source_range,
1326            header,
1327            body,
1328        }
1329    }
1330
1331    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1332        ParsedMarkdownTableRow { children }
1333    }
1334
1335    impl PartialEq for ParsedMarkdownTable {
1336        fn eq(&self, other: &Self) -> bool {
1337            self.source_range == other.source_range
1338                && self.header == other.header
1339                && self.body == other.body
1340        }
1341    }
1342
1343    impl PartialEq for ParsedMarkdownText {
1344        fn eq(&self, other: &Self) -> bool {
1345            self.source_range == other.source_range && self.contents == other.contents
1346        }
1347    }
1348}