markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39impl<'a> MarkdownParser<'a> {
  40    fn new(
  41        tokens: Vec<(Event<'a>, Range<usize>)>,
  42        file_location_directory: Option<PathBuf>,
  43        language_registry: Option<Arc<LanguageRegistry>>,
  44    ) -> Self {
  45        Self {
  46            tokens,
  47            file_location_directory,
  48            language_registry,
  49            cursor: 0,
  50            parsed: vec![],
  51        }
  52    }
  53
  54    fn eof(&self) -> bool {
  55        if self.tokens.is_empty() {
  56            return true;
  57        }
  58        self.cursor >= self.tokens.len() - 1
  59    }
  60
  61    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  62        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  63            return self.tokens.last();
  64        }
  65        return self.tokens.get(self.cursor + steps);
  66    }
  67
  68    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  69        if self.cursor == 0 || self.cursor > self.tokens.len() {
  70            return None;
  71        }
  72        return self.tokens.get(self.cursor - 1);
  73    }
  74
  75    fn current(&self) -> Option<&(Event, Range<usize>)> {
  76        return self.peek(0);
  77    }
  78
  79    fn current_event(&self) -> Option<&Event> {
  80        return self.current().map(|(event, _)| event);
  81    }
  82
  83    fn is_text_like(event: &Event) -> bool {
  84        match event {
  85            Event::Text(_)
  86            // Represent an inline code block
  87            | Event::Code(_)
  88            | Event::Html(_)
  89            | Event::FootnoteReference(_)
  90            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
  91            | Event::Start(Tag::Emphasis)
  92            | Event::Start(Tag::Strong)
  93            | Event::Start(Tag::Strikethrough)
  94            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
  95                true
  96            }
  97            _ => false,
  98        }
  99    }
 100
 101    async fn parse_document(mut self) -> Self {
 102        while !self.eof() {
 103            if let Some(block) = self.parse_block().await {
 104                self.parsed.extend(block);
 105            } else {
 106                self.cursor += 1;
 107            }
 108        }
 109        self
 110    }
 111
 112    #[async_recursion]
 113    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 114        let (current, source_range) = self.current().unwrap();
 115        let source_range = source_range.clone();
 116        match current {
 117            Event::Start(tag) => match tag {
 118                Tag::Paragraph => {
 119                    self.cursor += 1;
 120                    let text = self.parse_text(false, Some(source_range));
 121                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 122                }
 123                Tag::Heading {
 124                    level,
 125                    id: _,
 126                    classes: _,
 127                    attrs: _,
 128                } => {
 129                    let level = *level;
 130                    self.cursor += 1;
 131                    let heading = self.parse_heading(level);
 132                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 133                }
 134                Tag::Table(alignment) => {
 135                    let alignment = alignment.clone();
 136                    self.cursor += 1;
 137                    let table = self.parse_table(alignment);
 138                    Some(vec![ParsedMarkdownElement::Table(table)])
 139                }
 140                Tag::List(order) => {
 141                    let order = *order;
 142                    self.cursor += 1;
 143                    let list = self.parse_list(order).await;
 144                    Some(list)
 145                }
 146                Tag::BlockQuote(_kind) => {
 147                    self.cursor += 1;
 148                    let block_quote = self.parse_block_quote().await;
 149                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 150                }
 151                Tag::CodeBlock(kind) => {
 152                    let language = match kind {
 153                        pulldown_cmark::CodeBlockKind::Indented => None,
 154                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 155                            if language.is_empty() {
 156                                None
 157                            } else {
 158                                Some(language.to_string())
 159                            }
 160                        }
 161                    };
 162
 163                    self.cursor += 1;
 164
 165                    let code_block = self.parse_code_block(language).await;
 166                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 167                }
 168                _ => None,
 169            },
 170            Event::Rule => {
 171                let source_range = source_range.clone();
 172                self.cursor += 1;
 173                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 174            }
 175            _ => None,
 176        }
 177    }
 178
 179    fn parse_text(
 180        &mut self,
 181        should_complete_on_soft_break: bool,
 182        source_range: Option<Range<usize>>,
 183    ) -> ParsedMarkdownText {
 184        let source_range = source_range.unwrap_or_else(|| {
 185            self.current()
 186                .map(|(_, range)| range.clone())
 187                .unwrap_or_default()
 188        });
 189
 190        let mut text = String::new();
 191        let mut bold_depth = 0;
 192        let mut italic_depth = 0;
 193        let mut strikethrough_depth = 0;
 194        let mut link: Option<Link> = None;
 195        let mut region_ranges: Vec<Range<usize>> = vec![];
 196        let mut regions: Vec<ParsedRegion> = vec![];
 197        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 198
 199        let mut link_urls: Vec<String> = vec![];
 200        let mut link_ranges: Vec<Range<usize>> = vec![];
 201
 202        loop {
 203            if self.eof() {
 204                break;
 205            }
 206
 207            let (current, _source_range) = self.current().unwrap();
 208            let prev_len = text.len();
 209            match current {
 210                Event::SoftBreak => {
 211                    if should_complete_on_soft_break {
 212                        break;
 213                    }
 214
 215                    // `Some text\nSome more text` should be treated as a single line.
 216                    text.push(' ');
 217                }
 218
 219                Event::HardBreak => {
 220                    text.push('\n');
 221                }
 222
 223                Event::Text(t) => {
 224                    text.push_str(t.as_ref());
 225
 226                    let mut style = MarkdownHighlightStyle::default();
 227
 228                    if bold_depth > 0 {
 229                        style.weight = FontWeight::BOLD;
 230                    }
 231
 232                    if italic_depth > 0 {
 233                        style.italic = true;
 234                    }
 235
 236                    if strikethrough_depth > 0 {
 237                        style.strikethrough = true;
 238                    }
 239
 240                    let last_run_len = if let Some(link) = link.clone() {
 241                        region_ranges.push(prev_len..text.len());
 242                        regions.push(ParsedRegion {
 243                            code: false,
 244                            link: Some(link),
 245                        });
 246                        style.underline = true;
 247                        prev_len
 248                    } else {
 249                        // Manually scan for links
 250                        let mut finder = linkify::LinkFinder::new();
 251                        finder.kinds(&[linkify::LinkKind::Url]);
 252                        let mut last_link_len = prev_len;
 253                        for link in finder.links(t) {
 254                            let start = link.start();
 255                            let end = link.end();
 256                            let range = (prev_len + start)..(prev_len + end);
 257                            link_ranges.push(range.clone());
 258                            link_urls.push(link.as_str().to_string());
 259
 260                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 261                            if style != MarkdownHighlightStyle::default()
 262                                && last_link_len < link.start()
 263                            {
 264                                highlights.push((
 265                                    last_link_len..link.start(),
 266                                    MarkdownHighlight::Style(style.clone()),
 267                                ));
 268                            }
 269
 270                            highlights.push((
 271                                range.clone(),
 272                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 273                                    underline: true,
 274                                    ..style
 275                                }),
 276                            ));
 277                            region_ranges.push(range.clone());
 278                            regions.push(ParsedRegion {
 279                                code: false,
 280                                link: Some(Link::Web {
 281                                    url: link.as_str().to_string(),
 282                                }),
 283                            });
 284
 285                            last_link_len = end;
 286                        }
 287                        last_link_len
 288                    };
 289
 290                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 291                        let mut new_highlight = true;
 292                        if let Some((last_range, last_style)) = highlights.last_mut() {
 293                            if last_range.end == last_run_len
 294                                && last_style == &MarkdownHighlight::Style(style.clone())
 295                            {
 296                                last_range.end = text.len();
 297                                new_highlight = false;
 298                            }
 299                        }
 300                        if new_highlight {
 301                            highlights
 302                                .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
 303                        }
 304                    }
 305                }
 306
 307                // Note: This event means "inline code" and not "code block"
 308                Event::Code(t) => {
 309                    text.push_str(t.as_ref());
 310                    region_ranges.push(prev_len..text.len());
 311
 312                    if link.is_some() {
 313                        highlights.push((
 314                            prev_len..text.len(),
 315                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 316                                underline: true,
 317                                ..Default::default()
 318                            }),
 319                        ));
 320                    }
 321
 322                    regions.push(ParsedRegion {
 323                        code: true,
 324                        link: link.clone(),
 325                    });
 326                }
 327
 328                Event::Start(tag) => match tag {
 329                    Tag::Emphasis => italic_depth += 1,
 330                    Tag::Strong => bold_depth += 1,
 331                    Tag::Strikethrough => strikethrough_depth += 1,
 332                    Tag::Link {
 333                        link_type: _,
 334                        dest_url,
 335                        title: _,
 336                        id: _,
 337                    } => {
 338                        link = Link::identify(
 339                            self.file_location_directory.clone(),
 340                            dest_url.to_string(),
 341                        );
 342                    }
 343                    _ => {
 344                        break;
 345                    }
 346                },
 347
 348                Event::End(tag) => match tag {
 349                    TagEnd::Emphasis => {
 350                        italic_depth -= 1;
 351                    }
 352                    TagEnd::Strong => {
 353                        bold_depth -= 1;
 354                    }
 355                    TagEnd::Strikethrough => {
 356                        strikethrough_depth -= 1;
 357                    }
 358                    TagEnd::Link => {
 359                        link = None;
 360                    }
 361                    TagEnd::Paragraph => {
 362                        self.cursor += 1;
 363                        break;
 364                    }
 365                    _ => {
 366                        break;
 367                    }
 368                },
 369
 370                _ => {
 371                    break;
 372                }
 373            }
 374
 375            self.cursor += 1;
 376        }
 377
 378        ParsedMarkdownText {
 379            source_range,
 380            contents: text,
 381            highlights,
 382            regions,
 383            region_ranges,
 384        }
 385    }
 386
 387    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 388        let (_event, source_range) = self.previous().unwrap();
 389        let source_range = source_range.clone();
 390        let text = self.parse_text(true, None);
 391
 392        // Advance past the heading end tag
 393        self.cursor += 1;
 394
 395        ParsedMarkdownHeading {
 396            source_range: source_range.clone(),
 397            level: match level {
 398                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 399                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 400                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 401                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 402                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 403                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 404            },
 405            contents: text,
 406        }
 407    }
 408
 409    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 410        let (_event, source_range) = self.previous().unwrap();
 411        let source_range = source_range.clone();
 412        let mut header = ParsedMarkdownTableRow::new();
 413        let mut body = vec![];
 414        let mut current_row = vec![];
 415        let mut in_header = true;
 416        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 417
 418        loop {
 419            if self.eof() {
 420                break;
 421            }
 422
 423            let (current, source_range) = self.current().unwrap();
 424            let source_range = source_range.clone();
 425            match current {
 426                Event::Start(Tag::TableHead)
 427                | Event::Start(Tag::TableRow)
 428                | Event::End(TagEnd::TableCell) => {
 429                    self.cursor += 1;
 430                }
 431                Event::Start(Tag::TableCell) => {
 432                    self.cursor += 1;
 433                    let cell_contents = self.parse_text(false, Some(source_range));
 434                    current_row.push(cell_contents);
 435                }
 436                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 437                    self.cursor += 1;
 438                    let new_row = std::mem::take(&mut current_row);
 439                    if in_header {
 440                        header.children = new_row;
 441                        in_header = false;
 442                    } else {
 443                        let row = ParsedMarkdownTableRow::with_children(new_row);
 444                        body.push(row);
 445                    }
 446                }
 447                Event::End(TagEnd::Table) => {
 448                    self.cursor += 1;
 449                    break;
 450                }
 451                _ => {
 452                    break;
 453                }
 454            }
 455        }
 456
 457        ParsedMarkdownTable {
 458            source_range,
 459            header,
 460            body,
 461            column_alignments,
 462        }
 463    }
 464
 465    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 466        match alignment {
 467            Alignment::None => ParsedMarkdownTableAlignment::None,
 468            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 469            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 470            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 471        }
 472    }
 473
 474    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 475        let (_, list_source_range) = self.previous().unwrap();
 476
 477        let mut items = Vec::new();
 478        let mut items_stack = vec![Vec::new()];
 479        let mut depth = 1;
 480        let mut task_item = None;
 481        let mut order = order;
 482        let mut order_stack = Vec::new();
 483
 484        let mut insertion_indices = FxHashMap::default();
 485        let mut source_ranges = FxHashMap::default();
 486        let mut start_item_range = list_source_range.clone();
 487
 488        while !self.eof() {
 489            let (current, source_range) = self.current().unwrap();
 490            match current {
 491                Event::Start(Tag::List(new_order)) => {
 492                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 493                        insertion_indices.insert(depth, items.len());
 494                    }
 495
 496                    // We will use the start of the nested list as the end for the current item's range,
 497                    // because we don't care about the hierarchy of list items
 498                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 499                        e.insert(start_item_range.start..source_range.start);
 500                    }
 501
 502                    order_stack.push(order);
 503                    order = *new_order;
 504                    self.cursor += 1;
 505                    depth += 1;
 506                }
 507                Event::End(TagEnd::List(_)) => {
 508                    order = order_stack.pop().flatten();
 509                    self.cursor += 1;
 510                    depth -= 1;
 511
 512                    if depth == 0 {
 513                        break;
 514                    }
 515                }
 516                Event::Start(Tag::Item) => {
 517                    start_item_range = source_range.clone();
 518
 519                    self.cursor += 1;
 520                    items_stack.push(Vec::new());
 521
 522                    // Check for task list marker (`- [ ]` or `- [x]`)
 523                    if let Some(event) = self.current_event() {
 524                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 525                        if event == &Event::Start(Tag::Paragraph) {
 526                            self.cursor += 1;
 527                        }
 528
 529                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 530                            task_item = Some((*checked, range.clone()));
 531                            self.cursor += 1;
 532                        }
 533                    }
 534
 535                    if let Some((event, range)) = self.current() {
 536                        // This is a plain list item.
 537                        // For example `- some text` or `1. [Docs](./docs.md)`
 538                        if MarkdownParser::is_text_like(event) {
 539                            let text = self.parse_text(false, Some(range.clone()));
 540                            let block = ParsedMarkdownElement::Paragraph(text);
 541                            if let Some(content) = items_stack.last_mut() {
 542                                content.push(block);
 543                            }
 544                        } else {
 545                            let block = self.parse_block().await;
 546                            if let Some(block) = block {
 547                                if let Some(content) = items_stack.last_mut() {
 548                                    content.extend(block);
 549                                }
 550                            }
 551                        }
 552                    }
 553
 554                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 555                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 556                        self.cursor += 1;
 557                    }
 558                }
 559                Event::End(TagEnd::Item) => {
 560                    self.cursor += 1;
 561
 562                    let item_type = if let Some((checked, range)) = task_item {
 563                        ParsedMarkdownListItemType::Task(checked, range)
 564                    } else if let Some(order) = order {
 565                        ParsedMarkdownListItemType::Ordered(order)
 566                    } else {
 567                        ParsedMarkdownListItemType::Unordered
 568                    };
 569
 570                    if let Some(current) = order {
 571                        order = Some(current + 1);
 572                    }
 573
 574                    if let Some(content) = items_stack.pop() {
 575                        let source_range = source_ranges
 576                            .remove(&depth)
 577                            .unwrap_or(start_item_range.clone());
 578
 579                        // We need to remove the last character of the source range, because it includes the newline character
 580                        let source_range = source_range.start..source_range.end - 1;
 581                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 582                            source_range,
 583                            content,
 584                            depth,
 585                            item_type,
 586                        });
 587
 588                        if let Some(index) = insertion_indices.get(&depth) {
 589                            items.insert(*index, item);
 590                            insertion_indices.remove(&depth);
 591                        } else {
 592                            items.push(item);
 593                        }
 594                    }
 595
 596                    task_item = None;
 597                }
 598                _ => {
 599                    if depth == 0 {
 600                        break;
 601                    }
 602                    // This can only happen if a list item starts with more then one paragraph,
 603                    // or the list item contains blocks that should be rendered after the nested list items
 604                    let block = self.parse_block().await;
 605                    if let Some(block) = block {
 606                        if let Some(items_stack) = items_stack.last_mut() {
 607                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 608                            if !insertion_indices.contains_key(&depth) {
 609                                items_stack.extend(block);
 610                                continue;
 611                            }
 612                        }
 613
 614                        // Otherwise we need to insert the block after all the nested items
 615                        // that have been parsed so far
 616                        items.extend(block);
 617                    }
 618                }
 619            }
 620        }
 621
 622        items
 623    }
 624
 625    #[async_recursion]
 626    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 627        let (_event, source_range) = self.previous().unwrap();
 628        let source_range = source_range.clone();
 629        let mut nested_depth = 1;
 630
 631        let mut children: Vec<ParsedMarkdownElement> = vec![];
 632
 633        while !self.eof() {
 634            let block = self.parse_block().await;
 635
 636            if let Some(block) = block {
 637                children.extend(block);
 638            } else {
 639                break;
 640            }
 641
 642            if self.eof() {
 643                break;
 644            }
 645
 646            let (current, _source_range) = self.current().unwrap();
 647            match current {
 648                // This is a nested block quote.
 649                // Record that we're in a nested block quote and continue parsing.
 650                // We don't need to advance the cursor since the next
 651                // call to `parse_block` will handle it.
 652                Event::Start(Tag::BlockQuote(_kind)) => {
 653                    nested_depth += 1;
 654                }
 655                Event::End(TagEnd::BlockQuote(_kind)) => {
 656                    nested_depth -= 1;
 657                    if nested_depth == 0 {
 658                        self.cursor += 1;
 659                        break;
 660                    }
 661                }
 662                _ => {}
 663            };
 664        }
 665
 666        ParsedMarkdownBlockQuote {
 667            source_range,
 668            children,
 669        }
 670    }
 671
 672    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 673        let (_event, source_range) = self.previous().unwrap();
 674        let source_range = source_range.clone();
 675        let mut code = String::new();
 676
 677        while !self.eof() {
 678            let (current, _source_range) = self.current().unwrap();
 679            match current {
 680                Event::Text(text) => {
 681                    code.push_str(text);
 682                    self.cursor += 1;
 683                }
 684                Event::End(TagEnd::CodeBlock) => {
 685                    self.cursor += 1;
 686                    break;
 687                }
 688                _ => {
 689                    break;
 690                }
 691            }
 692        }
 693
 694        let highlights = if let Some(language) = &language {
 695            if let Some(registry) = &self.language_registry {
 696                let rope: language::Rope = code.as_str().into();
 697                registry
 698                    .language_for_name_or_extension(language)
 699                    .await
 700                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 701                    .ok()
 702            } else {
 703                None
 704            }
 705        } else {
 706            None
 707        };
 708
 709        ParsedMarkdownCodeBlock {
 710            source_range,
 711            contents: code.trim().to_string().into(),
 712            language,
 713            highlights,
 714        }
 715    }
 716}
 717
 718#[cfg(test)]
 719mod tests {
 720    use super::*;
 721
 722    use gpui::BackgroundExecutor;
 723    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 724    use pretty_assertions::assert_eq;
 725
 726    use ParsedMarkdownListItemType::*;
 727
 728    async fn parse(input: &str) -> ParsedMarkdown {
 729        parse_markdown(input, None, None).await
 730    }
 731
 732    #[gpui::test]
 733    async fn test_headings() {
 734        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 735
 736        assert_eq!(
 737            parsed.children,
 738            vec![
 739                h1(text("Heading one", 2..13), 0..14),
 740                h2(text("Heading two", 17..28), 14..29),
 741                h3(text("Heading three", 33..46), 29..46),
 742            ]
 743        );
 744    }
 745
 746    #[gpui::test]
 747    async fn test_newlines_dont_new_paragraphs() {
 748        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 749
 750        assert_eq!(
 751            parsed.children,
 752            vec![p("Some text that is bolded and italicized", 0..46)]
 753        );
 754    }
 755
 756    #[gpui::test]
 757    async fn test_heading_with_paragraph() {
 758        let parsed = parse("# Zed\nThe editor").await;
 759
 760        assert_eq!(
 761            parsed.children,
 762            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 763        );
 764    }
 765
 766    #[gpui::test]
 767    async fn test_double_newlines_do_new_paragraphs() {
 768        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 769
 770        assert_eq!(
 771            parsed.children,
 772            vec![
 773                p("Some text that is bolded", 0..29),
 774                p("and italicized", 31..47),
 775            ]
 776        );
 777    }
 778
 779    #[gpui::test]
 780    async fn test_bold_italic_text() {
 781        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 782
 783        assert_eq!(
 784            parsed.children,
 785            vec![p("Some text that is bolded and italicized", 0..45)]
 786        );
 787    }
 788
 789    #[gpui::test]
 790    async fn test_nested_bold_strikethrough_text() {
 791        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 792
 793        assert_eq!(parsed.children.len(), 1);
 794        assert_eq!(
 795            parsed.children[0],
 796            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 797                source_range: 0..35,
 798                contents: "Some bostrikethroughld text".to_string(),
 799                highlights: Vec::new(),
 800                region_ranges: Vec::new(),
 801                regions: Vec::new(),
 802            })
 803        );
 804
 805        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 806            text
 807        } else {
 808            panic!("Expected a paragraph");
 809        };
 810        assert_eq!(
 811            paragraph.highlights,
 812            vec![
 813                (
 814                    5..7,
 815                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 816                        weight: FontWeight::BOLD,
 817                        ..Default::default()
 818                    }),
 819                ),
 820                (
 821                    7..20,
 822                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 823                        weight: FontWeight::BOLD,
 824                        strikethrough: true,
 825                        ..Default::default()
 826                    }),
 827                ),
 828                (
 829                    20..22,
 830                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 831                        weight: FontWeight::BOLD,
 832                        ..Default::default()
 833                    }),
 834                ),
 835            ]
 836        );
 837    }
 838
 839    #[gpui::test]
 840    async fn test_raw_links_detection() {
 841        let parsed = parse("Checkout this https://zed.dev link").await;
 842
 843        assert_eq!(
 844            parsed.children,
 845            vec![p("Checkout this https://zed.dev link", 0..34)]
 846        );
 847
 848        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 849            text
 850        } else {
 851            panic!("Expected a paragraph");
 852        };
 853        assert_eq!(
 854            paragraph.highlights,
 855            vec![(
 856                14..29,
 857                MarkdownHighlight::Style(MarkdownHighlightStyle {
 858                    underline: true,
 859                    ..Default::default()
 860                }),
 861            )]
 862        );
 863        assert_eq!(
 864            paragraph.regions,
 865            vec![ParsedRegion {
 866                code: false,
 867                link: Some(Link::Web {
 868                    url: "https://zed.dev".to_string()
 869                }),
 870            }]
 871        );
 872        assert_eq!(paragraph.region_ranges, vec![14..29]);
 873    }
 874
 875    #[gpui::test]
 876    async fn test_header_only_table() {
 877        let markdown = "\
 878| Header 1 | Header 2 |
 879|----------|----------|
 880
 881Some other content
 882";
 883
 884        let expected_table = table(
 885            0..48,
 886            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 887            vec![],
 888        );
 889
 890        assert_eq!(
 891            parse(markdown).await.children[0],
 892            ParsedMarkdownElement::Table(expected_table)
 893        );
 894    }
 895
 896    #[gpui::test]
 897    async fn test_basic_table() {
 898        let markdown = "\
 899| Header 1 | Header 2 |
 900|----------|----------|
 901| Cell 1   | Cell 2   |
 902| Cell 3   | Cell 4   |";
 903
 904        let expected_table = table(
 905            0..95,
 906            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 907            vec![
 908                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 909                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 910            ],
 911        );
 912
 913        assert_eq!(
 914            parse(markdown).await.children[0],
 915            ParsedMarkdownElement::Table(expected_table)
 916        );
 917    }
 918
 919    #[gpui::test]
 920    async fn test_list_basic() {
 921        let parsed = parse(
 922            "\
 923* Item 1
 924* Item 2
 925* Item 3
 926",
 927        )
 928        .await;
 929
 930        assert_eq!(
 931            parsed.children,
 932            vec![
 933                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
 934                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
 935                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
 936            ],
 937        );
 938    }
 939
 940    #[gpui::test]
 941    async fn test_list_with_tasks() {
 942        let parsed = parse(
 943            "\
 944- [ ] TODO
 945- [x] Checked
 946",
 947        )
 948        .await;
 949
 950        assert_eq!(
 951            parsed.children,
 952            vec![
 953                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
 954                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
 955            ],
 956        );
 957    }
 958
 959    #[gpui::test]
 960    async fn test_list_with_linebreak_is_handled_correctly() {
 961        let parsed = parse(
 962            "\
 963- [ ] Task 1
 964
 965- [x] Task 2
 966",
 967        )
 968        .await;
 969
 970        assert_eq!(
 971            parsed.children,
 972            vec![
 973                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
 974                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
 975            ],
 976        );
 977    }
 978
 979    #[gpui::test]
 980    async fn test_list_nested() {
 981        let parsed = parse(
 982            "\
 983* Item 1
 984* Item 2
 985* Item 3
 986
 9871. Hello
 9881. Two
 989   1. Three
 9902. Four
 9913. Five
 992
 993* First
 994  1. Hello
 995     1. Goodbyte
 996        - Inner
 997        - Inner
 998  2. Goodbyte
 999        - Next item empty
1000        -
1001* Last
1002",
1003        )
1004        .await;
1005
1006        assert_eq!(
1007            parsed.children,
1008            vec![
1009                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1010                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1011                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1012                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1013                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1014                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1015                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1016                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1017                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1018                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1019                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1020                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1021                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1022                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1023                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1024                list_item(186..190, 3, Unordered, vec![]),
1025                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1026            ]
1027        );
1028    }
1029
1030    #[gpui::test]
1031    async fn test_list_with_nested_content() {
1032        let parsed = parse(
1033            "\
1034*   This is a list item with two paragraphs.
1035
1036    This is the second paragraph in the list item.
1037",
1038        )
1039        .await;
1040
1041        assert_eq!(
1042            parsed.children,
1043            vec![list_item(
1044                0..96,
1045                1,
1046                Unordered,
1047                vec![
1048                    p("This is a list item with two paragraphs.", 4..44),
1049                    p("This is the second paragraph in the list item.", 50..97)
1050                ],
1051            ),],
1052        );
1053    }
1054
1055    #[gpui::test]
1056    async fn test_nested_list_with_paragraph_inside() {
1057        let parsed = parse(
1058            "\
10591. a
1060    1. b
1061        1. c
1062
1063    text
1064
1065    1. d
1066",
1067        )
1068        .await;
1069
1070        assert_eq!(
1071            parsed.children,
1072            vec![
1073                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1074                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1075                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1076                p("text", 32..37),
1077                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1078            ],
1079        );
1080    }
1081
1082    #[gpui::test]
1083    async fn test_list_with_leading_text() {
1084        let parsed = parse(
1085            "\
1086* `code`
1087* **bold**
1088* [link](https://example.com)
1089",
1090        )
1091        .await;
1092
1093        assert_eq!(
1094            parsed.children,
1095            vec![
1096                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1097                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1098                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1099            ],
1100        );
1101    }
1102
1103    #[gpui::test]
1104    async fn test_simple_block_quote() {
1105        let parsed = parse("> Simple block quote with **styled text**").await;
1106
1107        assert_eq!(
1108            parsed.children,
1109            vec![block_quote(
1110                vec![p("Simple block quote with styled text", 2..41)],
1111                0..41
1112            )]
1113        );
1114    }
1115
1116    #[gpui::test]
1117    async fn test_simple_block_quote_with_multiple_lines() {
1118        let parsed = parse(
1119            "\
1120> # Heading
1121> More
1122> text
1123>
1124> More text
1125",
1126        )
1127        .await;
1128
1129        assert_eq!(
1130            parsed.children,
1131            vec![block_quote(
1132                vec![
1133                    h1(text("Heading", 4..11), 2..12),
1134                    p("More text", 14..26),
1135                    p("More text", 30..40)
1136                ],
1137                0..40
1138            )]
1139        );
1140    }
1141
1142    #[gpui::test]
1143    async fn test_nested_block_quote() {
1144        let parsed = parse(
1145            "\
1146> A
1147>
1148> > # B
1149>
1150> C
1151
1152More text
1153",
1154        )
1155        .await;
1156
1157        assert_eq!(
1158            parsed.children,
1159            vec![
1160                block_quote(
1161                    vec![
1162                        p("A", 2..4),
1163                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1164                        p("C", 18..20)
1165                    ],
1166                    0..20
1167                ),
1168                p("More text", 21..31)
1169            ]
1170        );
1171    }
1172
1173    #[gpui::test]
1174    async fn test_code_block() {
1175        let parsed = parse(
1176            "\
1177```
1178fn main() {
1179    return 0;
1180}
1181```
1182",
1183        )
1184        .await;
1185
1186        assert_eq!(
1187            parsed.children,
1188            vec![code_block(
1189                None,
1190                "fn main() {\n    return 0;\n}",
1191                0..35,
1192                None
1193            )]
1194        );
1195    }
1196
1197    #[gpui::test]
1198    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1199        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1200        language_registry.add(rust_lang());
1201
1202        let parsed = parse_markdown(
1203            "\
1204```rust
1205fn main() {
1206    return 0;
1207}
1208```
1209",
1210            None,
1211            Some(language_registry),
1212        )
1213        .await;
1214
1215        assert_eq!(
1216            parsed.children,
1217            vec![code_block(
1218                Some("rust".to_string()),
1219                "fn main() {\n    return 0;\n}",
1220                0..39,
1221                Some(vec![])
1222            )]
1223        );
1224    }
1225
1226    fn rust_lang() -> Arc<Language> {
1227        Arc::new(Language::new(
1228            LanguageConfig {
1229                name: "Rust".into(),
1230                matcher: LanguageMatcher {
1231                    path_suffixes: vec!["rs".into()],
1232                    ..Default::default()
1233                },
1234                collapsed_placeholder: " /* ... */ ".to_string(),
1235                ..Default::default()
1236            },
1237            Some(tree_sitter_rust::LANGUAGE.into()),
1238        ))
1239    }
1240
1241    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1242        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1243            source_range,
1244            level: HeadingLevel::H1,
1245            contents,
1246        })
1247    }
1248
1249    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1250        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1251            source_range,
1252            level: HeadingLevel::H2,
1253            contents,
1254        })
1255    }
1256
1257    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1258        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1259            source_range,
1260            level: HeadingLevel::H3,
1261            contents,
1262        })
1263    }
1264
1265    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1266        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1267    }
1268
1269    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1270        ParsedMarkdownText {
1271            highlights: Vec::new(),
1272            region_ranges: Vec::new(),
1273            regions: Vec::new(),
1274            source_range,
1275            contents: contents.to_string(),
1276        }
1277    }
1278
1279    fn block_quote(
1280        children: Vec<ParsedMarkdownElement>,
1281        source_range: Range<usize>,
1282    ) -> ParsedMarkdownElement {
1283        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1284            source_range,
1285            children,
1286        })
1287    }
1288
1289    fn code_block(
1290        language: Option<String>,
1291        code: &str,
1292        source_range: Range<usize>,
1293        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1294    ) -> ParsedMarkdownElement {
1295        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1296            source_range,
1297            language,
1298            contents: code.to_string().into(),
1299            highlights,
1300        })
1301    }
1302
1303    fn list_item(
1304        source_range: Range<usize>,
1305        depth: u16,
1306        item_type: ParsedMarkdownListItemType,
1307        content: Vec<ParsedMarkdownElement>,
1308    ) -> ParsedMarkdownElement {
1309        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1310            source_range,
1311            item_type,
1312            depth,
1313            content,
1314        })
1315    }
1316
1317    fn table(
1318        source_range: Range<usize>,
1319        header: ParsedMarkdownTableRow,
1320        body: Vec<ParsedMarkdownTableRow>,
1321    ) -> ParsedMarkdownTable {
1322        ParsedMarkdownTable {
1323            column_alignments: Vec::new(),
1324            source_range,
1325            header,
1326            body,
1327        }
1328    }
1329
1330    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1331        ParsedMarkdownTableRow { children }
1332    }
1333
1334    impl PartialEq for ParsedMarkdownTable {
1335        fn eq(&self, other: &Self) -> bool {
1336            self.source_range == other.source_range
1337                && self.header == other.header
1338                && self.body == other.body
1339        }
1340    }
1341
1342    impl PartialEq for ParsedMarkdownText {
1343        fn eq(&self, other: &Self) -> bool {
1344            self.source_range == other.source_range && self.contents == other.contents
1345        }
1346    }
1347}