markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event<'_>> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::InlineHtml(_)
 104            | Event::FootnoteReference(_)
 105            | Event::Start(Tag::Link { .. })
 106            | Event::Start(Tag::Emphasis)
 107            | Event::Start(Tag::Strong)
 108            | Event::Start(Tag::Strikethrough)
 109            | Event::Start(Tag::Image { .. }) => {
 110                true
 111            }
 112            _ => false,
 113        }
 114    }
 115
 116    async fn parse_document(mut self) -> Self {
 117        while !self.eof() {
 118            if let Some(block) = self.parse_block().await {
 119                self.parsed.extend(block);
 120            } else {
 121                self.cursor += 1;
 122            }
 123        }
 124        self
 125    }
 126
 127    #[async_recursion]
 128    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 129        let (current, source_range) = self.current().unwrap();
 130        let source_range = source_range.clone();
 131        match current {
 132            Event::Start(tag) => match tag {
 133                Tag::Paragraph => {
 134                    self.cursor += 1;
 135                    let text = self.parse_text(false, Some(source_range));
 136                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 137                }
 138                Tag::Heading { level, .. } => {
 139                    let level = *level;
 140                    self.cursor += 1;
 141                    let heading = self.parse_heading(level);
 142                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 143                }
 144                Tag::Table(alignment) => {
 145                    let alignment = alignment.clone();
 146                    self.cursor += 1;
 147                    let table = self.parse_table(alignment);
 148                    Some(vec![ParsedMarkdownElement::Table(table)])
 149                }
 150                Tag::List(order) => {
 151                    let order = *order;
 152                    self.cursor += 1;
 153                    let list = self.parse_list(order).await;
 154                    Some(list)
 155                }
 156                Tag::BlockQuote(_kind) => {
 157                    self.cursor += 1;
 158                    let block_quote = self.parse_block_quote().await;
 159                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 160                }
 161                Tag::CodeBlock(kind) => {
 162                    let language = match kind {
 163                        pulldown_cmark::CodeBlockKind::Indented => None,
 164                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 165                            if language.is_empty() {
 166                                None
 167                            } else {
 168                                Some(language.to_string())
 169                            }
 170                        }
 171                    };
 172
 173                    self.cursor += 1;
 174
 175                    let code_block = self.parse_code_block(language).await;
 176                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 177                }
 178                _ => None,
 179            },
 180            Event::Rule => {
 181                let source_range = source_range.clone();
 182                self.cursor += 1;
 183                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 184            }
 185            _ => None,
 186        }
 187    }
 188
 189    fn parse_text(
 190        &mut self,
 191        should_complete_on_soft_break: bool,
 192        source_range: Option<Range<usize>>,
 193    ) -> MarkdownParagraph {
 194        let source_range = source_range.unwrap_or_else(|| {
 195            self.current()
 196                .map(|(_, range)| range.clone())
 197                .unwrap_or_default()
 198        });
 199
 200        let mut markdown_text_like = Vec::new();
 201        let mut text = String::new();
 202        let mut bold_depth = 0;
 203        let mut italic_depth = 0;
 204        let mut strikethrough_depth = 0;
 205        let mut link: Option<Link> = None;
 206        let mut image: Option<Image> = None;
 207        let mut region_ranges: Vec<Range<usize>> = vec![];
 208        let mut regions: Vec<ParsedRegion> = vec![];
 209        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 210        let mut link_urls: Vec<String> = vec![];
 211        let mut link_ranges: Vec<Range<usize>> = vec![];
 212
 213        loop {
 214            if self.eof() {
 215                break;
 216            }
 217
 218            let (current, _) = self.current().unwrap();
 219            let prev_len = text.len();
 220            match current {
 221                Event::SoftBreak => {
 222                    if should_complete_on_soft_break {
 223                        break;
 224                    }
 225                    text.push(' ');
 226                }
 227
 228                Event::HardBreak => {
 229                    text.push('\n');
 230                }
 231
 232                // We want to ignore any inline HTML tags in the text but keep
 233                // the text between them
 234                Event::InlineHtml(_) => {}
 235
 236                Event::Text(t) => {
 237                    text.push_str(t.as_ref());
 238                    let mut style = MarkdownHighlightStyle::default();
 239
 240                    if bold_depth > 0 {
 241                        style.weight = FontWeight::BOLD;
 242                    }
 243
 244                    if italic_depth > 0 {
 245                        style.italic = true;
 246                    }
 247
 248                    if strikethrough_depth > 0 {
 249                        style.strikethrough = true;
 250                    }
 251
 252                    let last_run_len = if let Some(link) = link.clone() {
 253                        region_ranges.push(prev_len..text.len());
 254                        regions.push(ParsedRegion {
 255                            code: false,
 256                            link: Some(link),
 257                        });
 258                        style.underline = true;
 259                        prev_len
 260                    } else {
 261                        // Manually scan for links
 262                        let mut finder = linkify::LinkFinder::new();
 263                        finder.kinds(&[linkify::LinkKind::Url]);
 264                        let mut last_link_len = prev_len;
 265                        for link in finder.links(t) {
 266                            let start = link.start();
 267                            let end = link.end();
 268                            let range = (prev_len + start)..(prev_len + end);
 269                            link_ranges.push(range.clone());
 270                            link_urls.push(link.as_str().to_string());
 271
 272                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 273                            if style != MarkdownHighlightStyle::default()
 274                                && last_link_len < link.start()
 275                            {
 276                                highlights.push((
 277                                    last_link_len..link.start(),
 278                                    MarkdownHighlight::Style(style.clone()),
 279                                ));
 280                            }
 281
 282                            highlights.push((
 283                                range.clone(),
 284                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 285                                    underline: true,
 286                                    ..style
 287                                }),
 288                            ));
 289                            region_ranges.push(range.clone());
 290                            regions.push(ParsedRegion {
 291                                code: false,
 292                                link: Some(Link::Web {
 293                                    url: link.as_str().to_string(),
 294                                }),
 295                            });
 296                            last_link_len = end;
 297                        }
 298                        last_link_len
 299                    };
 300
 301                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 302                        let mut new_highlight = true;
 303                        if let Some((last_range, last_style)) = highlights.last_mut() {
 304                            if last_range.end == last_run_len
 305                                && last_style == &MarkdownHighlight::Style(style.clone())
 306                            {
 307                                last_range.end = text.len();
 308                                new_highlight = false;
 309                            }
 310                        }
 311                        if new_highlight {
 312                            highlights.push((
 313                                last_run_len..text.len(),
 314                                MarkdownHighlight::Style(style.clone()),
 315                            ));
 316                        }
 317                    }
 318                }
 319                Event::Code(t) => {
 320                    text.push_str(t.as_ref());
 321                    region_ranges.push(prev_len..text.len());
 322
 323                    if link.is_some() {
 324                        highlights.push((
 325                            prev_len..text.len(),
 326                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 327                                underline: true,
 328                                ..Default::default()
 329                            }),
 330                        ));
 331                    }
 332                    regions.push(ParsedRegion {
 333                        code: true,
 334                        link: link.clone(),
 335                    });
 336                }
 337                Event::Start(tag) => match tag {
 338                    Tag::Emphasis => italic_depth += 1,
 339                    Tag::Strong => bold_depth += 1,
 340                    Tag::Strikethrough => strikethrough_depth += 1,
 341                    Tag::Link { dest_url, .. } => {
 342                        link = Link::identify(
 343                            self.file_location_directory.clone(),
 344                            dest_url.to_string(),
 345                        );
 346                    }
 347                    Tag::Image { dest_url, .. } => {
 348                        if !text.is_empty() {
 349                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 350                                source_range: source_range.clone(),
 351                                contents: text.clone(),
 352                                highlights: highlights.clone(),
 353                                region_ranges: region_ranges.clone(),
 354                                regions: regions.clone(),
 355                            });
 356                            text = String::new();
 357                            highlights = vec![];
 358                            region_ranges = vec![];
 359                            regions = vec![];
 360                            markdown_text_like.push(parsed_regions);
 361                        }
 362                        image = Image::identify(
 363                            dest_url.to_string(),
 364                            source_range.clone(),
 365                            self.file_location_directory.clone(),
 366                        );
 367                    }
 368                    _ => {
 369                        break;
 370                    }
 371                },
 372
 373                Event::End(tag) => match tag {
 374                    TagEnd::Emphasis => italic_depth -= 1,
 375                    TagEnd::Strong => bold_depth -= 1,
 376                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 377                    TagEnd::Link => {
 378                        link = None;
 379                    }
 380                    TagEnd::Image => {
 381                        if let Some(mut image) = image.take() {
 382                            if !text.is_empty() {
 383                                image.alt_text = Some(std::mem::take(&mut text).into());
 384                            }
 385                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 386                        }
 387                    }
 388                    TagEnd::Paragraph => {
 389                        self.cursor += 1;
 390                        break;
 391                    }
 392                    _ => {
 393                        break;
 394                    }
 395                },
 396                _ => {
 397                    break;
 398                }
 399            }
 400
 401            self.cursor += 1;
 402        }
 403        if !text.is_empty() {
 404            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 405                source_range: source_range.clone(),
 406                contents: text,
 407                highlights,
 408                regions,
 409                region_ranges,
 410            }));
 411        }
 412        markdown_text_like
 413    }
 414
 415    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 416        let (_event, source_range) = self.previous().unwrap();
 417        let source_range = source_range.clone();
 418        let text = self.parse_text(true, None);
 419
 420        // Advance past the heading end tag
 421        self.cursor += 1;
 422
 423        ParsedMarkdownHeading {
 424            source_range: source_range.clone(),
 425            level: match level {
 426                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 427                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 428                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 429                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 430                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 431                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 432            },
 433            contents: text,
 434        }
 435    }
 436
 437    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 438        let (_event, source_range) = self.previous().unwrap();
 439        let source_range = source_range.clone();
 440        let mut header = ParsedMarkdownTableRow::new();
 441        let mut body = vec![];
 442        let mut current_row = vec![];
 443        let mut in_header = true;
 444        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 445
 446        loop {
 447            if self.eof() {
 448                break;
 449            }
 450
 451            let (current, source_range) = self.current().unwrap();
 452            let source_range = source_range.clone();
 453            match current {
 454                Event::Start(Tag::TableHead)
 455                | Event::Start(Tag::TableRow)
 456                | Event::End(TagEnd::TableCell) => {
 457                    self.cursor += 1;
 458                }
 459                Event::Start(Tag::TableCell) => {
 460                    self.cursor += 1;
 461                    let cell_contents = self.parse_text(false, Some(source_range));
 462                    current_row.push(cell_contents);
 463                }
 464                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 465                    self.cursor += 1;
 466                    let new_row = std::mem::take(&mut current_row);
 467                    if in_header {
 468                        header.children = new_row;
 469                        in_header = false;
 470                    } else {
 471                        let row = ParsedMarkdownTableRow::with_children(new_row);
 472                        body.push(row);
 473                    }
 474                }
 475                Event::End(TagEnd::Table) => {
 476                    self.cursor += 1;
 477                    break;
 478                }
 479                _ => {
 480                    break;
 481                }
 482            }
 483        }
 484
 485        ParsedMarkdownTable {
 486            source_range,
 487            header,
 488            body,
 489            column_alignments,
 490        }
 491    }
 492
 493    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 494        match alignment {
 495            Alignment::None => ParsedMarkdownTableAlignment::None,
 496            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 497            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 498            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 499        }
 500    }
 501
 502    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 503        let (_, list_source_range) = self.previous().unwrap();
 504
 505        let mut items = Vec::new();
 506        let mut items_stack = vec![MarkdownListItem::default()];
 507        let mut depth = 1;
 508        let mut order = order;
 509        let mut order_stack = Vec::new();
 510
 511        let mut insertion_indices = FxHashMap::default();
 512        let mut source_ranges = FxHashMap::default();
 513        let mut start_item_range = list_source_range.clone();
 514
 515        while !self.eof() {
 516            let (current, source_range) = self.current().unwrap();
 517            match current {
 518                Event::Start(Tag::List(new_order)) => {
 519                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 520                        insertion_indices.insert(depth, items.len());
 521                    }
 522
 523                    // We will use the start of the nested list as the end for the current item's range,
 524                    // because we don't care about the hierarchy of list items
 525                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 526                        e.insert(start_item_range.start..source_range.start);
 527                    }
 528
 529                    order_stack.push(order);
 530                    order = *new_order;
 531                    self.cursor += 1;
 532                    depth += 1;
 533                }
 534                Event::End(TagEnd::List(_)) => {
 535                    order = order_stack.pop().flatten();
 536                    self.cursor += 1;
 537                    depth -= 1;
 538
 539                    if depth == 0 {
 540                        break;
 541                    }
 542                }
 543                Event::Start(Tag::Item) => {
 544                    start_item_range = source_range.clone();
 545
 546                    self.cursor += 1;
 547                    items_stack.push(MarkdownListItem::default());
 548
 549                    let mut task_list = None;
 550                    // Check for task list marker (`- [ ]` or `- [x]`)
 551                    if let Some(event) = self.current_event() {
 552                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 553                        if event == &Event::Start(Tag::Paragraph) {
 554                            self.cursor += 1;
 555                        }
 556
 557                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 558                            task_list = Some((*checked, range.clone()));
 559                            self.cursor += 1;
 560                        }
 561                    }
 562
 563                    if let Some((event, range)) = self.current() {
 564                        // This is a plain list item.
 565                        // For example `- some text` or `1. [Docs](./docs.md)`
 566                        if MarkdownParser::is_text_like(event) {
 567                            let text = self.parse_text(false, Some(range.clone()));
 568                            let block = ParsedMarkdownElement::Paragraph(text);
 569                            if let Some(content) = items_stack.last_mut() {
 570                                let item_type = if let Some((checked, range)) = task_list {
 571                                    ParsedMarkdownListItemType::Task(checked, range)
 572                                } else if let Some(order) = order {
 573                                    ParsedMarkdownListItemType::Ordered(order)
 574                                } else {
 575                                    ParsedMarkdownListItemType::Unordered
 576                                };
 577                                content.item_type = item_type;
 578                                content.content.push(block);
 579                            }
 580                        } else {
 581                            let block = self.parse_block().await;
 582                            if let Some(block) = block {
 583                                if let Some(list_item) = items_stack.last_mut() {
 584                                    list_item.content.extend(block);
 585                                }
 586                            }
 587                        }
 588                    }
 589
 590                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 591                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 592                        self.cursor += 1;
 593                    }
 594                }
 595                Event::End(TagEnd::Item) => {
 596                    self.cursor += 1;
 597
 598                    if let Some(current) = order {
 599                        order = Some(current + 1);
 600                    }
 601
 602                    if let Some(list_item) = items_stack.pop() {
 603                        let source_range = source_ranges
 604                            .remove(&depth)
 605                            .unwrap_or(start_item_range.clone());
 606
 607                        // We need to remove the last character of the source range, because it includes the newline character
 608                        let source_range = source_range.start..source_range.end - 1;
 609                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 610                            source_range,
 611                            content: list_item.content,
 612                            depth,
 613                            item_type: list_item.item_type,
 614                        });
 615
 616                        if let Some(index) = insertion_indices.get(&depth) {
 617                            items.insert(*index, item);
 618                            insertion_indices.remove(&depth);
 619                        } else {
 620                            items.push(item);
 621                        }
 622                    }
 623                }
 624                _ => {
 625                    if depth == 0 {
 626                        break;
 627                    }
 628                    // This can only happen if a list item starts with more then one paragraph,
 629                    // or the list item contains blocks that should be rendered after the nested list items
 630                    let block = self.parse_block().await;
 631                    if let Some(block) = block {
 632                        if let Some(list_item) = items_stack.last_mut() {
 633                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 634                            if !insertion_indices.contains_key(&depth) {
 635                                list_item.content.extend(block);
 636                                continue;
 637                            }
 638                        }
 639
 640                        // Otherwise we need to insert the block after all the nested items
 641                        // that have been parsed so far
 642                        items.extend(block);
 643                    } else {
 644                        self.cursor += 1;
 645                    }
 646                }
 647            }
 648        }
 649
 650        items
 651    }
 652
 653    #[async_recursion]
 654    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 655        let (_event, source_range) = self.previous().unwrap();
 656        let source_range = source_range.clone();
 657        let mut nested_depth = 1;
 658
 659        let mut children: Vec<ParsedMarkdownElement> = vec![];
 660
 661        while !self.eof() {
 662            let block = self.parse_block().await;
 663
 664            if let Some(block) = block {
 665                children.extend(block);
 666            } else {
 667                break;
 668            }
 669
 670            if self.eof() {
 671                break;
 672            }
 673
 674            let (current, _source_range) = self.current().unwrap();
 675            match current {
 676                // This is a nested block quote.
 677                // Record that we're in a nested block quote and continue parsing.
 678                // We don't need to advance the cursor since the next
 679                // call to `parse_block` will handle it.
 680                Event::Start(Tag::BlockQuote(_kind)) => {
 681                    nested_depth += 1;
 682                }
 683                Event::End(TagEnd::BlockQuote(_kind)) => {
 684                    nested_depth -= 1;
 685                    if nested_depth == 0 {
 686                        self.cursor += 1;
 687                        break;
 688                    }
 689                }
 690                _ => {}
 691            };
 692        }
 693
 694        ParsedMarkdownBlockQuote {
 695            source_range,
 696            children,
 697        }
 698    }
 699
 700    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 701        let (_event, source_range) = self.previous().unwrap();
 702        let source_range = source_range.clone();
 703        let mut code = String::new();
 704
 705        while !self.eof() {
 706            let (current, _source_range) = self.current().unwrap();
 707            match current {
 708                Event::Text(text) => {
 709                    code.push_str(text);
 710                    self.cursor += 1;
 711                }
 712                Event::End(TagEnd::CodeBlock) => {
 713                    self.cursor += 1;
 714                    break;
 715                }
 716                _ => {
 717                    break;
 718                }
 719            }
 720        }
 721
 722        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 723
 724        let highlights = if let Some(language) = &language {
 725            if let Some(registry) = &self.language_registry {
 726                let rope: language::Rope = code.as_str().into();
 727                registry
 728                    .language_for_name_or_extension(language)
 729                    .await
 730                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 731                    .ok()
 732            } else {
 733                None
 734            }
 735        } else {
 736            None
 737        };
 738
 739        ParsedMarkdownCodeBlock {
 740            source_range,
 741            contents: code.into(),
 742            language,
 743            highlights,
 744        }
 745    }
 746}
 747
 748#[cfg(test)]
 749mod tests {
 750    use core::panic;
 751
 752    use super::*;
 753
 754    use ParsedMarkdownListItemType::*;
 755    use gpui::BackgroundExecutor;
 756    use language::{
 757        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
 758    };
 759    use pretty_assertions::assert_eq;
 760
 761    async fn parse(input: &str) -> ParsedMarkdown {
 762        parse_markdown(input, None, None).await
 763    }
 764
 765    #[gpui::test]
 766    async fn test_headings() {
 767        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 768
 769        assert_eq!(
 770            parsed.children,
 771            vec![
 772                h1(text("Heading one", 2..13), 0..14),
 773                h2(text("Heading two", 17..28), 14..29),
 774                h3(text("Heading three", 33..46), 29..46),
 775            ]
 776        );
 777    }
 778
 779    #[gpui::test]
 780    async fn test_newlines_dont_new_paragraphs() {
 781        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 782
 783        assert_eq!(
 784            parsed.children,
 785            vec![p("Some text that is bolded and italicized", 0..46)]
 786        );
 787    }
 788
 789    #[gpui::test]
 790    async fn test_heading_with_paragraph() {
 791        let parsed = parse("# Zed\nThe editor").await;
 792
 793        assert_eq!(
 794            parsed.children,
 795            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 796        );
 797    }
 798
 799    #[gpui::test]
 800    async fn test_double_newlines_do_new_paragraphs() {
 801        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 802
 803        assert_eq!(
 804            parsed.children,
 805            vec![
 806                p("Some text that is bolded", 0..29),
 807                p("and italicized", 31..47),
 808            ]
 809        );
 810    }
 811
 812    #[gpui::test]
 813    async fn test_bold_italic_text() {
 814        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 815
 816        assert_eq!(
 817            parsed.children,
 818            vec![p("Some text that is bolded and italicized", 0..45)]
 819        );
 820    }
 821
 822    #[gpui::test]
 823    async fn test_nested_bold_strikethrough_text() {
 824        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 825
 826        assert_eq!(parsed.children.len(), 1);
 827        assert_eq!(
 828            parsed.children[0],
 829            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 830                ParsedMarkdownText {
 831                    source_range: 0..35,
 832                    contents: "Some bostrikethroughld text".to_string(),
 833                    highlights: Vec::new(),
 834                    region_ranges: Vec::new(),
 835                    regions: Vec::new(),
 836                }
 837            )])
 838        );
 839
 840        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 841            text
 842        } else {
 843            panic!("Expected a paragraph");
 844        };
 845
 846        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 847            text
 848        } else {
 849            panic!("Expected a text");
 850        };
 851
 852        assert_eq!(
 853            paragraph.highlights,
 854            vec![
 855                (
 856                    5..7,
 857                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 858                        weight: FontWeight::BOLD,
 859                        ..Default::default()
 860                    }),
 861                ),
 862                (
 863                    7..20,
 864                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 865                        weight: FontWeight::BOLD,
 866                        strikethrough: true,
 867                        ..Default::default()
 868                    }),
 869                ),
 870                (
 871                    20..22,
 872                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 873                        weight: FontWeight::BOLD,
 874                        ..Default::default()
 875                    }),
 876                ),
 877            ]
 878        );
 879    }
 880
 881    #[gpui::test]
 882    async fn test_text_with_inline_html() {
 883        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 884
 885        assert_eq!(
 886            parsed.children,
 887            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 888        );
 889    }
 890
 891    #[gpui::test]
 892    async fn test_raw_links_detection() {
 893        let parsed = parse("Checkout this https://zed.dev link").await;
 894
 895        assert_eq!(
 896            parsed.children,
 897            vec![p("Checkout this https://zed.dev link", 0..34)]
 898        );
 899    }
 900
 901    #[gpui::test]
 902    async fn test_empty_image() {
 903        let parsed = parse("![]()").await;
 904
 905        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 906            text
 907        } else {
 908            panic!("Expected a paragraph");
 909        };
 910        assert_eq!(paragraph.len(), 0);
 911    }
 912
 913    #[gpui::test]
 914    async fn test_image_links_detection() {
 915        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 916
 917        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 918            text
 919        } else {
 920            panic!("Expected a paragraph");
 921        };
 922        assert_eq!(
 923            paragraph[0],
 924            MarkdownParagraphChunk::Image(Image {
 925                source_range: 0..111,
 926                link: Link::Web {
 927                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 928                },
 929                alt_text: Some("test".into()),
 930            },)
 931        );
 932    }
 933
 934    #[gpui::test]
 935    async fn test_image_without_alt_text() {
 936        let parsed = parse("![](http://example.com/foo.png)").await;
 937
 938        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 939            text
 940        } else {
 941            panic!("Expected a paragraph");
 942        };
 943        assert_eq!(
 944            paragraph[0],
 945            MarkdownParagraphChunk::Image(Image {
 946                source_range: 0..31,
 947                link: Link::Web {
 948                    url: "http://example.com/foo.png".to_string(),
 949                },
 950                alt_text: None,
 951            },)
 952        );
 953    }
 954
 955    #[gpui::test]
 956    async fn test_image_with_alt_text_containing_formatting() {
 957        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
 958
 959        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
 960            panic!("Expected a paragraph");
 961        };
 962        assert_eq!(
 963            chunks,
 964            &[MarkdownParagraphChunk::Image(Image {
 965                source_range: 0..44,
 966                link: Link::Web {
 967                    url: "http://example.com/foo.png".to_string(),
 968                },
 969                alt_text: Some("foo bar baz".into()),
 970            }),],
 971        );
 972    }
 973
 974    #[gpui::test]
 975    async fn test_images_with_text_in_between() {
 976        let parsed = parse(
 977            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
 978        )
 979        .await;
 980
 981        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 982            text
 983        } else {
 984            panic!("Expected a paragraph");
 985        };
 986        assert_eq!(
 987            chunks,
 988            &vec![
 989                MarkdownParagraphChunk::Image(Image {
 990                    source_range: 0..81,
 991                    link: Link::Web {
 992                        url: "http://example.com/foo.png".to_string(),
 993                    },
 994                    alt_text: Some("foo".into()),
 995                }),
 996                MarkdownParagraphChunk::Text(ParsedMarkdownText {
 997                    source_range: 0..81,
 998                    contents: " Lorem Ipsum ".to_string(),
 999                    highlights: Vec::new(),
1000                    region_ranges: Vec::new(),
1001                    regions: Vec::new(),
1002                }),
1003                MarkdownParagraphChunk::Image(Image {
1004                    source_range: 0..81,
1005                    link: Link::Web {
1006                        url: "http://example.com/bar.png".to_string(),
1007                    },
1008                    alt_text: Some("bar".into()),
1009                })
1010            ]
1011        );
1012    }
1013
1014    #[gpui::test]
1015    async fn test_header_only_table() {
1016        let markdown = "\
1017| Header 1 | Header 2 |
1018|----------|----------|
1019
1020Some other content
1021";
1022
1023        let expected_table = table(
1024            0..48,
1025            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1026            vec![],
1027        );
1028
1029        assert_eq!(
1030            parse(markdown).await.children[0],
1031            ParsedMarkdownElement::Table(expected_table)
1032        );
1033    }
1034
1035    #[gpui::test]
1036    async fn test_basic_table() {
1037        let markdown = "\
1038| Header 1 | Header 2 |
1039|----------|----------|
1040| Cell 1   | Cell 2   |
1041| Cell 3   | Cell 4   |";
1042
1043        let expected_table = table(
1044            0..95,
1045            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1046            vec![
1047                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1048                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1049            ],
1050        );
1051
1052        assert_eq!(
1053            parse(markdown).await.children[0],
1054            ParsedMarkdownElement::Table(expected_table)
1055        );
1056    }
1057
1058    #[gpui::test]
1059    async fn test_list_basic() {
1060        let parsed = parse(
1061            "\
1062* Item 1
1063* Item 2
1064* Item 3
1065",
1066        )
1067        .await;
1068
1069        assert_eq!(
1070            parsed.children,
1071            vec![
1072                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1073                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1074                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1075            ],
1076        );
1077    }
1078
1079    #[gpui::test]
1080    async fn test_list_with_tasks() {
1081        let parsed = parse(
1082            "\
1083- [ ] TODO
1084- [x] Checked
1085",
1086        )
1087        .await;
1088
1089        assert_eq!(
1090            parsed.children,
1091            vec![
1092                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1093                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1094            ],
1095        );
1096    }
1097
1098    #[gpui::test]
1099    async fn test_list_with_indented_task() {
1100        let parsed = parse(
1101            "\
1102- [ ] TODO
1103  - [x] Checked
1104  - Unordered
1105  1. Number 1
1106  1. Number 2
11071. Number A
1108",
1109        )
1110        .await;
1111
1112        assert_eq!(
1113            parsed.children,
1114            vec![
1115                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1116                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1117                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1118                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1119                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1120                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1121            ],
1122        );
1123    }
1124
1125    #[gpui::test]
1126    async fn test_list_with_linebreak_is_handled_correctly() {
1127        let parsed = parse(
1128            "\
1129- [ ] Task 1
1130
1131- [x] Task 2
1132",
1133        )
1134        .await;
1135
1136        assert_eq!(
1137            parsed.children,
1138            vec![
1139                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1140                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1141            ],
1142        );
1143    }
1144
1145    #[gpui::test]
1146    async fn test_list_nested() {
1147        let parsed = parse(
1148            "\
1149* Item 1
1150* Item 2
1151* Item 3
1152
11531. Hello
11541. Two
1155   1. Three
11562. Four
11573. Five
1158
1159* First
1160  1. Hello
1161     1. Goodbyte
1162        - Inner
1163        - Inner
1164  2. Goodbyte
1165        - Next item empty
1166        -
1167* Last
1168",
1169        )
1170        .await;
1171
1172        assert_eq!(
1173            parsed.children,
1174            vec![
1175                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1176                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1177                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1178                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1179                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1180                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1181                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1182                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1183                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1184                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1185                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1186                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1187                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1188                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1189                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1190                list_item(186..190, 3, Unordered, vec![]),
1191                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1192            ]
1193        );
1194    }
1195
1196    #[gpui::test]
1197    async fn test_list_with_nested_content() {
1198        let parsed = parse(
1199            "\
1200*   This is a list item with two paragraphs.
1201
1202    This is the second paragraph in the list item.
1203",
1204        )
1205        .await;
1206
1207        assert_eq!(
1208            parsed.children,
1209            vec![list_item(
1210                0..96,
1211                1,
1212                Unordered,
1213                vec![
1214                    p("This is a list item with two paragraphs.", 4..44),
1215                    p("This is the second paragraph in the list item.", 50..97)
1216                ],
1217            ),],
1218        );
1219    }
1220
1221    #[gpui::test]
1222    async fn test_list_item_with_inline_html() {
1223        let parsed = parse(
1224            "\
1225*   This is a list item with an inline HTML <sometag>tag</sometag>.
1226",
1227        )
1228        .await;
1229
1230        assert_eq!(
1231            parsed.children,
1232            vec![list_item(
1233                0..67,
1234                1,
1235                Unordered,
1236                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1237            ),],
1238        );
1239    }
1240
1241    #[gpui::test]
1242    async fn test_nested_list_with_paragraph_inside() {
1243        let parsed = parse(
1244            "\
12451. a
1246    1. b
1247        1. c
1248
1249    text
1250
1251    1. d
1252",
1253        )
1254        .await;
1255
1256        assert_eq!(
1257            parsed.children,
1258            vec![
1259                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1260                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1261                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1262                p("text", 32..37),
1263                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1264            ],
1265        );
1266    }
1267
1268    #[gpui::test]
1269    async fn test_list_with_leading_text() {
1270        let parsed = parse(
1271            "\
1272* `code`
1273* **bold**
1274* [link](https://example.com)
1275",
1276        )
1277        .await;
1278
1279        assert_eq!(
1280            parsed.children,
1281            vec![
1282                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1283                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1284                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1285            ],
1286        );
1287    }
1288
1289    #[gpui::test]
1290    async fn test_simple_block_quote() {
1291        let parsed = parse("> Simple block quote with **styled text**").await;
1292
1293        assert_eq!(
1294            parsed.children,
1295            vec![block_quote(
1296                vec![p("Simple block quote with styled text", 2..41)],
1297                0..41
1298            )]
1299        );
1300    }
1301
1302    #[gpui::test]
1303    async fn test_simple_block_quote_with_multiple_lines() {
1304        let parsed = parse(
1305            "\
1306> # Heading
1307> More
1308> text
1309>
1310> More text
1311",
1312        )
1313        .await;
1314
1315        assert_eq!(
1316            parsed.children,
1317            vec![block_quote(
1318                vec![
1319                    h1(text("Heading", 4..11), 2..12),
1320                    p("More text", 14..26),
1321                    p("More text", 30..40)
1322                ],
1323                0..40
1324            )]
1325        );
1326    }
1327
1328    #[gpui::test]
1329    async fn test_nested_block_quote() {
1330        let parsed = parse(
1331            "\
1332> A
1333>
1334> > # B
1335>
1336> C
1337
1338More text
1339",
1340        )
1341        .await;
1342
1343        assert_eq!(
1344            parsed.children,
1345            vec![
1346                block_quote(
1347                    vec![
1348                        p("A", 2..4),
1349                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1350                        p("C", 18..20)
1351                    ],
1352                    0..20
1353                ),
1354                p("More text", 21..31)
1355            ]
1356        );
1357    }
1358
1359    #[gpui::test]
1360    async fn test_code_block() {
1361        let parsed = parse(
1362            "\
1363```
1364fn main() {
1365    return 0;
1366}
1367```
1368",
1369        )
1370        .await;
1371
1372        assert_eq!(
1373            parsed.children,
1374            vec![code_block(
1375                None,
1376                "fn main() {\n    return 0;\n}",
1377                0..35,
1378                None
1379            )]
1380        );
1381    }
1382
1383    #[gpui::test]
1384    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1385        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1386        language_registry.add(rust_lang());
1387
1388        let parsed = parse_markdown(
1389            "\
1390```rust
1391fn main() {
1392    return 0;
1393}
1394```
1395",
1396            None,
1397            Some(language_registry),
1398        )
1399        .await;
1400
1401        assert_eq!(
1402            parsed.children,
1403            vec![code_block(
1404                Some("rust".to_string()),
1405                "fn main() {\n    return 0;\n}",
1406                0..39,
1407                Some(vec![])
1408            )]
1409        );
1410    }
1411
1412    fn rust_lang() -> Arc<Language> {
1413        Arc::new(Language::new(
1414            LanguageConfig {
1415                name: "Rust".into(),
1416                matcher: LanguageMatcher {
1417                    path_suffixes: vec!["rs".into()],
1418                    ..Default::default()
1419                },
1420                collapsed_placeholder: " /* ... */ ".to_string(),
1421                ..Default::default()
1422            },
1423            Some(tree_sitter_rust::LANGUAGE.into()),
1424        ))
1425    }
1426
1427    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1428        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1429            source_range,
1430            level: HeadingLevel::H1,
1431            contents,
1432        })
1433    }
1434
1435    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1436        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1437            source_range,
1438            level: HeadingLevel::H2,
1439            contents,
1440        })
1441    }
1442
1443    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1444        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1445            source_range,
1446            level: HeadingLevel::H3,
1447            contents,
1448        })
1449    }
1450
1451    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1452        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1453    }
1454
1455    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1456        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1457            highlights: Vec::new(),
1458            region_ranges: Vec::new(),
1459            regions: Vec::new(),
1460            source_range,
1461            contents: contents.to_string(),
1462        })]
1463    }
1464
1465    fn block_quote(
1466        children: Vec<ParsedMarkdownElement>,
1467        source_range: Range<usize>,
1468    ) -> ParsedMarkdownElement {
1469        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1470            source_range,
1471            children,
1472        })
1473    }
1474
1475    fn code_block(
1476        language: Option<String>,
1477        code: &str,
1478        source_range: Range<usize>,
1479        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1480    ) -> ParsedMarkdownElement {
1481        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1482            source_range,
1483            language,
1484            contents: code.to_string().into(),
1485            highlights,
1486        })
1487    }
1488
1489    fn list_item(
1490        source_range: Range<usize>,
1491        depth: u16,
1492        item_type: ParsedMarkdownListItemType,
1493        content: Vec<ParsedMarkdownElement>,
1494    ) -> ParsedMarkdownElement {
1495        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1496            source_range,
1497            item_type,
1498            depth,
1499            content,
1500        })
1501    }
1502
1503    fn table(
1504        source_range: Range<usize>,
1505        header: ParsedMarkdownTableRow,
1506        body: Vec<ParsedMarkdownTableRow>,
1507    ) -> ParsedMarkdownTable {
1508        ParsedMarkdownTable {
1509            column_alignments: Vec::new(),
1510            source_range,
1511            header,
1512            body,
1513        }
1514    }
1515
1516    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1517        ParsedMarkdownTableRow { children }
1518    }
1519
1520    impl PartialEq for ParsedMarkdownTable {
1521        fn eq(&self, other: &Self) -> bool {
1522            self.source_range == other.source_range
1523                && self.header == other.header
1524                && self.body == other.body
1525        }
1526    }
1527
1528    impl PartialEq for ParsedMarkdownText {
1529        fn eq(&self, other: &Self) -> bool {
1530            self.source_range == other.source_range && self.contents == other.contents
1531        }
1532    }
1533}