markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::InlineHtml(_)
 104            | Event::FootnoteReference(_)
 105            | Event::Start(Tag::Link { .. })
 106            | Event::Start(Tag::Emphasis)
 107            | Event::Start(Tag::Strong)
 108            | Event::Start(Tag::Strikethrough)
 109            | Event::Start(Tag::Image { .. }) => {
 110                true
 111            }
 112            _ => false,
 113        }
 114    }
 115
 116    async fn parse_document(mut self) -> Self {
 117        while !self.eof() {
 118            if let Some(block) = self.parse_block().await {
 119                self.parsed.extend(block);
 120            } else {
 121                self.cursor += 1;
 122            }
 123        }
 124        self
 125    }
 126
 127    #[async_recursion]
 128    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 129        let (current, source_range) = self.current().unwrap();
 130        let source_range = source_range.clone();
 131        match current {
 132            Event::Start(tag) => match tag {
 133                Tag::Paragraph => {
 134                    self.cursor += 1;
 135                    let text = self.parse_text(false, Some(source_range));
 136                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 137                }
 138                Tag::Heading { level, .. } => {
 139                    let level = *level;
 140                    self.cursor += 1;
 141                    let heading = self.parse_heading(level);
 142                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 143                }
 144                Tag::Table(alignment) => {
 145                    let alignment = alignment.clone();
 146                    self.cursor += 1;
 147                    let table = self.parse_table(alignment);
 148                    Some(vec![ParsedMarkdownElement::Table(table)])
 149                }
 150                Tag::List(order) => {
 151                    let order = *order;
 152                    self.cursor += 1;
 153                    let list = self.parse_list(order).await;
 154                    Some(list)
 155                }
 156                Tag::BlockQuote(_kind) => {
 157                    self.cursor += 1;
 158                    let block_quote = self.parse_block_quote().await;
 159                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 160                }
 161                Tag::CodeBlock(kind) => {
 162                    let language = match kind {
 163                        pulldown_cmark::CodeBlockKind::Indented => None,
 164                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 165                            if language.is_empty() {
 166                                None
 167                            } else {
 168                                Some(language.to_string())
 169                            }
 170                        }
 171                    };
 172
 173                    self.cursor += 1;
 174
 175                    let code_block = self.parse_code_block(language).await;
 176                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 177                }
 178                _ => None,
 179            },
 180            Event::Rule => {
 181                let source_range = source_range.clone();
 182                self.cursor += 1;
 183                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 184            }
 185            _ => None,
 186        }
 187    }
 188
 189    fn parse_text(
 190        &mut self,
 191        should_complete_on_soft_break: bool,
 192        source_range: Option<Range<usize>>,
 193    ) -> MarkdownParagraph {
 194        let source_range = source_range.unwrap_or_else(|| {
 195            self.current()
 196                .map(|(_, range)| range.clone())
 197                .unwrap_or_default()
 198        });
 199
 200        let mut markdown_text_like = Vec::new();
 201        let mut text = String::new();
 202        let mut bold_depth = 0;
 203        let mut italic_depth = 0;
 204        let mut strikethrough_depth = 0;
 205        let mut link: Option<Link> = None;
 206        let mut image: Option<Image> = None;
 207        let mut region_ranges: Vec<Range<usize>> = vec![];
 208        let mut regions: Vec<ParsedRegion> = vec![];
 209        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 210        let mut link_urls: Vec<String> = vec![];
 211        let mut link_ranges: Vec<Range<usize>> = vec![];
 212
 213        loop {
 214            if self.eof() {
 215                break;
 216            }
 217
 218            let (current, _) = self.current().unwrap();
 219            let prev_len = text.len();
 220            match current {
 221                Event::SoftBreak => {
 222                    if should_complete_on_soft_break {
 223                        break;
 224                    }
 225                    text.push(' ');
 226                }
 227
 228                Event::HardBreak => {
 229                    text.push('\n');
 230                }
 231
 232                // We want to ignore any inline HTML tags in the text but keep
 233                // the text between them
 234                Event::InlineHtml(_) => {}
 235
 236                Event::Text(t) => {
 237                    text.push_str(t.as_ref());
 238                    let mut style = MarkdownHighlightStyle::default();
 239
 240                    if bold_depth > 0 {
 241                        style.weight = FontWeight::BOLD;
 242                    }
 243
 244                    if italic_depth > 0 {
 245                        style.italic = true;
 246                    }
 247
 248                    if strikethrough_depth > 0 {
 249                        style.strikethrough = true;
 250                    }
 251
 252                    let last_run_len = if let Some(link) = link.clone() {
 253                        region_ranges.push(prev_len..text.len());
 254                        regions.push(ParsedRegion {
 255                            code: false,
 256                            link: Some(link),
 257                        });
 258                        style.underline = true;
 259                        prev_len
 260                    } else {
 261                        // Manually scan for links
 262                        let mut finder = linkify::LinkFinder::new();
 263                        finder.kinds(&[linkify::LinkKind::Url]);
 264                        let mut last_link_len = prev_len;
 265                        for link in finder.links(t) {
 266                            let start = link.start();
 267                            let end = link.end();
 268                            let range = (prev_len + start)..(prev_len + end);
 269                            link_ranges.push(range.clone());
 270                            link_urls.push(link.as_str().to_string());
 271
 272                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 273                            if style != MarkdownHighlightStyle::default()
 274                                && last_link_len < link.start()
 275                            {
 276                                highlights.push((
 277                                    last_link_len..link.start(),
 278                                    MarkdownHighlight::Style(style.clone()),
 279                                ));
 280                            }
 281
 282                            highlights.push((
 283                                range.clone(),
 284                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 285                                    underline: true,
 286                                    ..style
 287                                }),
 288                            ));
 289                            region_ranges.push(range.clone());
 290                            regions.push(ParsedRegion {
 291                                code: false,
 292                                link: Some(Link::Web {
 293                                    url: link.as_str().to_string(),
 294                                }),
 295                            });
 296                            last_link_len = end;
 297                        }
 298                        last_link_len
 299                    };
 300
 301                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 302                        let mut new_highlight = true;
 303                        if let Some((last_range, last_style)) = highlights.last_mut() {
 304                            if last_range.end == last_run_len
 305                                && last_style == &MarkdownHighlight::Style(style.clone())
 306                            {
 307                                last_range.end = text.len();
 308                                new_highlight = false;
 309                            }
 310                        }
 311                        if new_highlight {
 312                            highlights.push((
 313                                last_run_len..text.len(),
 314                                MarkdownHighlight::Style(style.clone()),
 315                            ));
 316                        }
 317                    }
 318                }
 319                Event::Code(t) => {
 320                    text.push_str(t.as_ref());
 321                    region_ranges.push(prev_len..text.len());
 322
 323                    if link.is_some() {
 324                        highlights.push((
 325                            prev_len..text.len(),
 326                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 327                                underline: true,
 328                                ..Default::default()
 329                            }),
 330                        ));
 331                    }
 332                    regions.push(ParsedRegion {
 333                        code: true,
 334                        link: link.clone(),
 335                    });
 336                }
 337                Event::Start(tag) => match tag {
 338                    Tag::Emphasis => italic_depth += 1,
 339                    Tag::Strong => bold_depth += 1,
 340                    Tag::Strikethrough => strikethrough_depth += 1,
 341                    Tag::Link { dest_url, .. } => {
 342                        link = Link::identify(
 343                            self.file_location_directory.clone(),
 344                            dest_url.to_string(),
 345                        );
 346                    }
 347                    Tag::Image { dest_url, .. } => {
 348                        if !text.is_empty() {
 349                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 350                                source_range: source_range.clone(),
 351                                contents: text.clone(),
 352                                highlights: highlights.clone(),
 353                                region_ranges: region_ranges.clone(),
 354                                regions: regions.clone(),
 355                            });
 356                            text = String::new();
 357                            highlights = vec![];
 358                            region_ranges = vec![];
 359                            regions = vec![];
 360                            markdown_text_like.push(parsed_regions);
 361                        }
 362                        image = Image::identify(
 363                            dest_url.to_string(),
 364                            source_range.clone(),
 365                            self.file_location_directory.clone(),
 366                        );
 367                    }
 368                    _ => {
 369                        break;
 370                    }
 371                },
 372
 373                Event::End(tag) => match tag {
 374                    TagEnd::Emphasis => italic_depth -= 1,
 375                    TagEnd::Strong => bold_depth -= 1,
 376                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 377                    TagEnd::Link => {
 378                        link = None;
 379                    }
 380                    TagEnd::Image => {
 381                        if let Some(mut image) = image.take() {
 382                            if !text.is_empty() {
 383                                image.alt_text = Some(std::mem::take(&mut text).into());
 384                            }
 385                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 386                        }
 387                    }
 388                    TagEnd::Paragraph => {
 389                        self.cursor += 1;
 390                        break;
 391                    }
 392                    _ => {
 393                        break;
 394                    }
 395                },
 396                _ => {
 397                    break;
 398                }
 399            }
 400
 401            self.cursor += 1;
 402        }
 403        if !text.is_empty() {
 404            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 405                source_range: source_range.clone(),
 406                contents: text,
 407                highlights,
 408                regions,
 409                region_ranges,
 410            }));
 411        }
 412        markdown_text_like
 413    }
 414
 415    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 416        let (_event, source_range) = self.previous().unwrap();
 417        let source_range = source_range.clone();
 418        let text = self.parse_text(true, None);
 419
 420        // Advance past the heading end tag
 421        self.cursor += 1;
 422
 423        ParsedMarkdownHeading {
 424            source_range: source_range.clone(),
 425            level: match level {
 426                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 427                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 428                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 429                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 430                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 431                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 432            },
 433            contents: text,
 434        }
 435    }
 436
 437    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 438        let (_event, source_range) = self.previous().unwrap();
 439        let source_range = source_range.clone();
 440        let mut header = ParsedMarkdownTableRow::new();
 441        let mut body = vec![];
 442        let mut current_row = vec![];
 443        let mut in_header = true;
 444        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 445
 446        loop {
 447            if self.eof() {
 448                break;
 449            }
 450
 451            let (current, source_range) = self.current().unwrap();
 452            let source_range = source_range.clone();
 453            match current {
 454                Event::Start(Tag::TableHead)
 455                | Event::Start(Tag::TableRow)
 456                | Event::End(TagEnd::TableCell) => {
 457                    self.cursor += 1;
 458                }
 459                Event::Start(Tag::TableCell) => {
 460                    self.cursor += 1;
 461                    let cell_contents = self.parse_text(false, Some(source_range));
 462                    current_row.push(cell_contents);
 463                }
 464                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 465                    self.cursor += 1;
 466                    let new_row = std::mem::take(&mut current_row);
 467                    if in_header {
 468                        header.children = new_row;
 469                        in_header = false;
 470                    } else {
 471                        let row = ParsedMarkdownTableRow::with_children(new_row);
 472                        body.push(row);
 473                    }
 474                }
 475                Event::End(TagEnd::Table) => {
 476                    self.cursor += 1;
 477                    break;
 478                }
 479                _ => {
 480                    break;
 481                }
 482            }
 483        }
 484
 485        ParsedMarkdownTable {
 486            source_range,
 487            header,
 488            body,
 489            column_alignments,
 490        }
 491    }
 492
 493    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 494        match alignment {
 495            Alignment::None => ParsedMarkdownTableAlignment::None,
 496            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 497            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 498            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 499        }
 500    }
 501
 502    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 503        let (_, list_source_range) = self.previous().unwrap();
 504
 505        let mut items = Vec::new();
 506        let mut items_stack = vec![MarkdownListItem::default()];
 507        let mut depth = 1;
 508        let mut order = order;
 509        let mut order_stack = Vec::new();
 510
 511        let mut insertion_indices = FxHashMap::default();
 512        let mut source_ranges = FxHashMap::default();
 513        let mut start_item_range = list_source_range.clone();
 514
 515        while !self.eof() {
 516            let (current, source_range) = self.current().unwrap();
 517            match current {
 518                Event::Start(Tag::List(new_order)) => {
 519                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 520                        insertion_indices.insert(depth, items.len());
 521                    }
 522
 523                    // We will use the start of the nested list as the end for the current item's range,
 524                    // because we don't care about the hierarchy of list items
 525                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 526                        e.insert(start_item_range.start..source_range.start);
 527                    }
 528
 529                    order_stack.push(order);
 530                    order = *new_order;
 531                    self.cursor += 1;
 532                    depth += 1;
 533                }
 534                Event::End(TagEnd::List(_)) => {
 535                    order = order_stack.pop().flatten();
 536                    self.cursor += 1;
 537                    depth -= 1;
 538
 539                    if depth == 0 {
 540                        break;
 541                    }
 542                }
 543                Event::Start(Tag::Item) => {
 544                    start_item_range = source_range.clone();
 545
 546                    self.cursor += 1;
 547                    items_stack.push(MarkdownListItem::default());
 548
 549                    let mut task_list = None;
 550                    // Check for task list marker (`- [ ]` or `- [x]`)
 551                    if let Some(event) = self.current_event() {
 552                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 553                        if event == &Event::Start(Tag::Paragraph) {
 554                            self.cursor += 1;
 555                        }
 556
 557                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 558                            task_list = Some((*checked, range.clone()));
 559                            self.cursor += 1;
 560                        }
 561                    }
 562
 563                    if let Some((event, range)) = self.current() {
 564                        // This is a plain list item.
 565                        // For example `- some text` or `1. [Docs](./docs.md)`
 566                        if MarkdownParser::is_text_like(event) {
 567                            let text = self.parse_text(false, Some(range.clone()));
 568                            let block = ParsedMarkdownElement::Paragraph(text);
 569                            if let Some(content) = items_stack.last_mut() {
 570                                let item_type = if let Some((checked, range)) = task_list {
 571                                    ParsedMarkdownListItemType::Task(checked, range)
 572                                } else if let Some(order) = order {
 573                                    ParsedMarkdownListItemType::Ordered(order)
 574                                } else {
 575                                    ParsedMarkdownListItemType::Unordered
 576                                };
 577                                content.item_type = item_type;
 578                                content.content.push(block);
 579                            }
 580                        } else {
 581                            let block = self.parse_block().await;
 582                            if let Some(block) = block {
 583                                if let Some(list_item) = items_stack.last_mut() {
 584                                    list_item.content.extend(block);
 585                                }
 586                            }
 587                        }
 588                    }
 589
 590                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 591                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 592                        self.cursor += 1;
 593                    }
 594                }
 595                Event::End(TagEnd::Item) => {
 596                    self.cursor += 1;
 597
 598                    if let Some(current) = order {
 599                        order = Some(current + 1);
 600                    }
 601
 602                    if let Some(list_item) = items_stack.pop() {
 603                        let source_range = source_ranges
 604                            .remove(&depth)
 605                            .unwrap_or(start_item_range.clone());
 606
 607                        // We need to remove the last character of the source range, because it includes the newline character
 608                        let source_range = source_range.start..source_range.end - 1;
 609                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 610                            source_range,
 611                            content: list_item.content,
 612                            depth,
 613                            item_type: list_item.item_type,
 614                        });
 615
 616                        if let Some(index) = insertion_indices.get(&depth) {
 617                            items.insert(*index, item);
 618                            insertion_indices.remove(&depth);
 619                        } else {
 620                            items.push(item);
 621                        }
 622                    }
 623                }
 624                _ => {
 625                    if depth == 0 {
 626                        break;
 627                    }
 628                    // This can only happen if a list item starts with more then one paragraph,
 629                    // or the list item contains blocks that should be rendered after the nested list items
 630                    let block = self.parse_block().await;
 631                    if let Some(block) = block {
 632                        if let Some(list_item) = items_stack.last_mut() {
 633                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 634                            if !insertion_indices.contains_key(&depth) {
 635                                list_item.content.extend(block);
 636                                continue;
 637                            }
 638                        }
 639
 640                        // Otherwise we need to insert the block after all the nested items
 641                        // that have been parsed so far
 642                        items.extend(block);
 643                    } else {
 644                        self.cursor += 1;
 645                    }
 646                }
 647            }
 648        }
 649
 650        items
 651    }
 652
 653    #[async_recursion]
 654    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 655        let (_event, source_range) = self.previous().unwrap();
 656        let source_range = source_range.clone();
 657        let mut nested_depth = 1;
 658
 659        let mut children: Vec<ParsedMarkdownElement> = vec![];
 660
 661        while !self.eof() {
 662            let block = self.parse_block().await;
 663
 664            if let Some(block) = block {
 665                children.extend(block);
 666            } else {
 667                break;
 668            }
 669
 670            if self.eof() {
 671                break;
 672            }
 673
 674            let (current, _source_range) = self.current().unwrap();
 675            match current {
 676                // This is a nested block quote.
 677                // Record that we're in a nested block quote and continue parsing.
 678                // We don't need to advance the cursor since the next
 679                // call to `parse_block` will handle it.
 680                Event::Start(Tag::BlockQuote(_kind)) => {
 681                    nested_depth += 1;
 682                }
 683                Event::End(TagEnd::BlockQuote(_kind)) => {
 684                    nested_depth -= 1;
 685                    if nested_depth == 0 {
 686                        self.cursor += 1;
 687                        break;
 688                    }
 689                }
 690                _ => {}
 691            };
 692        }
 693
 694        ParsedMarkdownBlockQuote {
 695            source_range,
 696            children,
 697        }
 698    }
 699
 700    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 701        let (_event, source_range) = self.previous().unwrap();
 702        let source_range = source_range.clone();
 703        let mut code = String::new();
 704
 705        while !self.eof() {
 706            let (current, _source_range) = self.current().unwrap();
 707            match current {
 708                Event::Text(text) => {
 709                    code.push_str(text);
 710                    self.cursor += 1;
 711                }
 712                Event::End(TagEnd::CodeBlock) => {
 713                    self.cursor += 1;
 714                    break;
 715                }
 716                _ => {
 717                    break;
 718                }
 719            }
 720        }
 721        let highlights = if let Some(language) = &language {
 722            if let Some(registry) = &self.language_registry {
 723                let rope: language::Rope = code.as_str().into();
 724                registry
 725                    .language_for_name_or_extension(language)
 726                    .await
 727                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 728                    .ok()
 729            } else {
 730                None
 731            }
 732        } else {
 733            None
 734        };
 735
 736        ParsedMarkdownCodeBlock {
 737            source_range,
 738            contents: code.trim().to_string().into(),
 739            language,
 740            highlights,
 741        }
 742    }
 743}
 744
 745#[cfg(test)]
 746mod tests {
 747    use core::panic;
 748
 749    use super::*;
 750
 751    use gpui::BackgroundExecutor;
 752    use language::{
 753        tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry,
 754    };
 755    use pretty_assertions::assert_eq;
 756    use ParsedMarkdownListItemType::*;
 757
 758    async fn parse(input: &str) -> ParsedMarkdown {
 759        parse_markdown(input, None, None).await
 760    }
 761
 762    #[gpui::test]
 763    async fn test_headings() {
 764        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 765
 766        assert_eq!(
 767            parsed.children,
 768            vec![
 769                h1(text("Heading one", 2..13), 0..14),
 770                h2(text("Heading two", 17..28), 14..29),
 771                h3(text("Heading three", 33..46), 29..46),
 772            ]
 773        );
 774    }
 775
 776    #[gpui::test]
 777    async fn test_newlines_dont_new_paragraphs() {
 778        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 779
 780        assert_eq!(
 781            parsed.children,
 782            vec![p("Some text that is bolded and italicized", 0..46)]
 783        );
 784    }
 785
 786    #[gpui::test]
 787    async fn test_heading_with_paragraph() {
 788        let parsed = parse("# Zed\nThe editor").await;
 789
 790        assert_eq!(
 791            parsed.children,
 792            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 793        );
 794    }
 795
 796    #[gpui::test]
 797    async fn test_double_newlines_do_new_paragraphs() {
 798        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 799
 800        assert_eq!(
 801            parsed.children,
 802            vec![
 803                p("Some text that is bolded", 0..29),
 804                p("and italicized", 31..47),
 805            ]
 806        );
 807    }
 808
 809    #[gpui::test]
 810    async fn test_bold_italic_text() {
 811        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 812
 813        assert_eq!(
 814            parsed.children,
 815            vec![p("Some text that is bolded and italicized", 0..45)]
 816        );
 817    }
 818
 819    #[gpui::test]
 820    async fn test_nested_bold_strikethrough_text() {
 821        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 822
 823        assert_eq!(parsed.children.len(), 1);
 824        assert_eq!(
 825            parsed.children[0],
 826            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 827                ParsedMarkdownText {
 828                    source_range: 0..35,
 829                    contents: "Some bostrikethroughld text".to_string(),
 830                    highlights: Vec::new(),
 831                    region_ranges: Vec::new(),
 832                    regions: Vec::new(),
 833                }
 834            )])
 835        );
 836
 837        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 838            text
 839        } else {
 840            panic!("Expected a paragraph");
 841        };
 842
 843        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 844            text
 845        } else {
 846            panic!("Expected a text");
 847        };
 848
 849        assert_eq!(
 850            paragraph.highlights,
 851            vec![
 852                (
 853                    5..7,
 854                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 855                        weight: FontWeight::BOLD,
 856                        ..Default::default()
 857                    }),
 858                ),
 859                (
 860                    7..20,
 861                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 862                        weight: FontWeight::BOLD,
 863                        strikethrough: true,
 864                        ..Default::default()
 865                    }),
 866                ),
 867                (
 868                    20..22,
 869                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 870                        weight: FontWeight::BOLD,
 871                        ..Default::default()
 872                    }),
 873                ),
 874            ]
 875        );
 876    }
 877
 878    #[gpui::test]
 879    async fn test_text_with_inline_html() {
 880        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 881
 882        assert_eq!(
 883            parsed.children,
 884            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 885        );
 886    }
 887
 888    #[gpui::test]
 889    async fn test_raw_links_detection() {
 890        let parsed = parse("Checkout this https://zed.dev link").await;
 891
 892        assert_eq!(
 893            parsed.children,
 894            vec![p("Checkout this https://zed.dev link", 0..34)]
 895        );
 896    }
 897
 898    #[gpui::test]
 899    async fn test_empty_image() {
 900        let parsed = parse("![]()").await;
 901
 902        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 903            text
 904        } else {
 905            panic!("Expected a paragraph");
 906        };
 907        assert_eq!(paragraph.len(), 0);
 908    }
 909
 910    #[gpui::test]
 911    async fn test_image_links_detection() {
 912        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 913
 914        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 915            text
 916        } else {
 917            panic!("Expected a paragraph");
 918        };
 919        assert_eq!(
 920            paragraph[0],
 921            MarkdownParagraphChunk::Image(Image {
 922                source_range: 0..111,
 923                link: Link::Web {
 924                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 925                },
 926                alt_text: Some("test".into()),
 927            },)
 928        );
 929    }
 930
 931    #[gpui::test]
 932    async fn test_image_without_alt_text() {
 933        let parsed = parse("![](http://example.com/foo.png)").await;
 934
 935        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 936            text
 937        } else {
 938            panic!("Expected a paragraph");
 939        };
 940        assert_eq!(
 941            paragraph[0],
 942            MarkdownParagraphChunk::Image(Image {
 943                source_range: 0..31,
 944                link: Link::Web {
 945                    url: "http://example.com/foo.png".to_string(),
 946                },
 947                alt_text: None,
 948            },)
 949        );
 950    }
 951
 952    #[gpui::test]
 953    async fn test_image_with_alt_text_containing_formatting() {
 954        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
 955
 956        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
 957            panic!("Expected a paragraph");
 958        };
 959        assert_eq!(
 960            chunks,
 961            &[MarkdownParagraphChunk::Image(Image {
 962                source_range: 0..44,
 963                link: Link::Web {
 964                    url: "http://example.com/foo.png".to_string(),
 965                },
 966                alt_text: Some("foo bar baz".into()),
 967            }),],
 968        );
 969    }
 970
 971    #[gpui::test]
 972    async fn test_images_with_text_in_between() {
 973        let parsed = parse(
 974            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
 975        )
 976        .await;
 977
 978        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 979            text
 980        } else {
 981            panic!("Expected a paragraph");
 982        };
 983        assert_eq!(
 984            chunks,
 985            &vec![
 986                MarkdownParagraphChunk::Image(Image {
 987                    source_range: 0..81,
 988                    link: Link::Web {
 989                        url: "http://example.com/foo.png".to_string(),
 990                    },
 991                    alt_text: Some("foo".into()),
 992                }),
 993                MarkdownParagraphChunk::Text(ParsedMarkdownText {
 994                    source_range: 0..81,
 995                    contents: " Lorem Ipsum ".to_string(),
 996                    highlights: Vec::new(),
 997                    region_ranges: Vec::new(),
 998                    regions: Vec::new(),
 999                }),
1000                MarkdownParagraphChunk::Image(Image {
1001                    source_range: 0..81,
1002                    link: Link::Web {
1003                        url: "http://example.com/bar.png".to_string(),
1004                    },
1005                    alt_text: Some("bar".into()),
1006                })
1007            ]
1008        );
1009    }
1010
1011    #[gpui::test]
1012    async fn test_header_only_table() {
1013        let markdown = "\
1014| Header 1 | Header 2 |
1015|----------|----------|
1016
1017Some other content
1018";
1019
1020        let expected_table = table(
1021            0..48,
1022            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1023            vec![],
1024        );
1025
1026        assert_eq!(
1027            parse(markdown).await.children[0],
1028            ParsedMarkdownElement::Table(expected_table)
1029        );
1030    }
1031
1032    #[gpui::test]
1033    async fn test_basic_table() {
1034        let markdown = "\
1035| Header 1 | Header 2 |
1036|----------|----------|
1037| Cell 1   | Cell 2   |
1038| Cell 3   | Cell 4   |";
1039
1040        let expected_table = table(
1041            0..95,
1042            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1043            vec![
1044                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1045                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1046            ],
1047        );
1048
1049        assert_eq!(
1050            parse(markdown).await.children[0],
1051            ParsedMarkdownElement::Table(expected_table)
1052        );
1053    }
1054
1055    #[gpui::test]
1056    async fn test_list_basic() {
1057        let parsed = parse(
1058            "\
1059* Item 1
1060* Item 2
1061* Item 3
1062",
1063        )
1064        .await;
1065
1066        assert_eq!(
1067            parsed.children,
1068            vec![
1069                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1070                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1071                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1072            ],
1073        );
1074    }
1075
1076    #[gpui::test]
1077    async fn test_list_with_tasks() {
1078        let parsed = parse(
1079            "\
1080- [ ] TODO
1081- [x] Checked
1082",
1083        )
1084        .await;
1085
1086        assert_eq!(
1087            parsed.children,
1088            vec![
1089                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1090                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1091            ],
1092        );
1093    }
1094
1095    #[gpui::test]
1096    async fn test_list_with_indented_task() {
1097        let parsed = parse(
1098            "\
1099- [ ] TODO
1100  - [x] Checked
1101  - Unordered
1102  1. Number 1
1103  1. Number 2
11041. Number A
1105",
1106        )
1107        .await;
1108
1109        assert_eq!(
1110            parsed.children,
1111            vec![
1112                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1113                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1114                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1115                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1116                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1117                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1118            ],
1119        );
1120    }
1121
1122    #[gpui::test]
1123    async fn test_list_with_linebreak_is_handled_correctly() {
1124        let parsed = parse(
1125            "\
1126- [ ] Task 1
1127
1128- [x] Task 2
1129",
1130        )
1131        .await;
1132
1133        assert_eq!(
1134            parsed.children,
1135            vec![
1136                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1137                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1138            ],
1139        );
1140    }
1141
1142    #[gpui::test]
1143    async fn test_list_nested() {
1144        let parsed = parse(
1145            "\
1146* Item 1
1147* Item 2
1148* Item 3
1149
11501. Hello
11511. Two
1152   1. Three
11532. Four
11543. Five
1155
1156* First
1157  1. Hello
1158     1. Goodbyte
1159        - Inner
1160        - Inner
1161  2. Goodbyte
1162        - Next item empty
1163        -
1164* Last
1165",
1166        )
1167        .await;
1168
1169        assert_eq!(
1170            parsed.children,
1171            vec![
1172                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1173                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1174                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1175                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1176                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1177                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1178                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1179                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1180                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1181                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1182                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1183                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1184                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1185                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1186                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1187                list_item(186..190, 3, Unordered, vec![]),
1188                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1189            ]
1190        );
1191    }
1192
1193    #[gpui::test]
1194    async fn test_list_with_nested_content() {
1195        let parsed = parse(
1196            "\
1197*   This is a list item with two paragraphs.
1198
1199    This is the second paragraph in the list item.
1200",
1201        )
1202        .await;
1203
1204        assert_eq!(
1205            parsed.children,
1206            vec![list_item(
1207                0..96,
1208                1,
1209                Unordered,
1210                vec![
1211                    p("This is a list item with two paragraphs.", 4..44),
1212                    p("This is the second paragraph in the list item.", 50..97)
1213                ],
1214            ),],
1215        );
1216    }
1217
1218    #[gpui::test]
1219    async fn test_list_item_with_inline_html() {
1220        let parsed = parse(
1221            "\
1222*   This is a list item with an inline HTML <sometag>tag</sometag>.
1223",
1224        )
1225        .await;
1226
1227        assert_eq!(
1228            parsed.children,
1229            vec![list_item(
1230                0..67,
1231                1,
1232                Unordered,
1233                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1234            ),],
1235        );
1236    }
1237
1238    #[gpui::test]
1239    async fn test_nested_list_with_paragraph_inside() {
1240        let parsed = parse(
1241            "\
12421. a
1243    1. b
1244        1. c
1245
1246    text
1247
1248    1. d
1249",
1250        )
1251        .await;
1252
1253        assert_eq!(
1254            parsed.children,
1255            vec![
1256                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1257                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1258                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1259                p("text", 32..37),
1260                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1261            ],
1262        );
1263    }
1264
1265    #[gpui::test]
1266    async fn test_list_with_leading_text() {
1267        let parsed = parse(
1268            "\
1269* `code`
1270* **bold**
1271* [link](https://example.com)
1272",
1273        )
1274        .await;
1275
1276        assert_eq!(
1277            parsed.children,
1278            vec![
1279                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1280                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1281                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1282            ],
1283        );
1284    }
1285
1286    #[gpui::test]
1287    async fn test_simple_block_quote() {
1288        let parsed = parse("> Simple block quote with **styled text**").await;
1289
1290        assert_eq!(
1291            parsed.children,
1292            vec![block_quote(
1293                vec![p("Simple block quote with styled text", 2..41)],
1294                0..41
1295            )]
1296        );
1297    }
1298
1299    #[gpui::test]
1300    async fn test_simple_block_quote_with_multiple_lines() {
1301        let parsed = parse(
1302            "\
1303> # Heading
1304> More
1305> text
1306>
1307> More text
1308",
1309        )
1310        .await;
1311
1312        assert_eq!(
1313            parsed.children,
1314            vec![block_quote(
1315                vec![
1316                    h1(text("Heading", 4..11), 2..12),
1317                    p("More text", 14..26),
1318                    p("More text", 30..40)
1319                ],
1320                0..40
1321            )]
1322        );
1323    }
1324
1325    #[gpui::test]
1326    async fn test_nested_block_quote() {
1327        let parsed = parse(
1328            "\
1329> A
1330>
1331> > # B
1332>
1333> C
1334
1335More text
1336",
1337        )
1338        .await;
1339
1340        assert_eq!(
1341            parsed.children,
1342            vec![
1343                block_quote(
1344                    vec![
1345                        p("A", 2..4),
1346                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1347                        p("C", 18..20)
1348                    ],
1349                    0..20
1350                ),
1351                p("More text", 21..31)
1352            ]
1353        );
1354    }
1355
1356    #[gpui::test]
1357    async fn test_code_block() {
1358        let parsed = parse(
1359            "\
1360```
1361fn main() {
1362    return 0;
1363}
1364```
1365",
1366        )
1367        .await;
1368
1369        assert_eq!(
1370            parsed.children,
1371            vec![code_block(
1372                None,
1373                "fn main() {\n    return 0;\n}",
1374                0..35,
1375                None
1376            )]
1377        );
1378    }
1379
1380    #[gpui::test]
1381    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1382        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1383        language_registry.add(rust_lang());
1384
1385        let parsed = parse_markdown(
1386            "\
1387```rust
1388fn main() {
1389    return 0;
1390}
1391```
1392",
1393            None,
1394            Some(language_registry),
1395        )
1396        .await;
1397
1398        assert_eq!(
1399            parsed.children,
1400            vec![code_block(
1401                Some("rust".to_string()),
1402                "fn main() {\n    return 0;\n}",
1403                0..39,
1404                Some(vec![])
1405            )]
1406        );
1407    }
1408
1409    fn rust_lang() -> Arc<Language> {
1410        Arc::new(Language::new(
1411            LanguageConfig {
1412                name: "Rust".into(),
1413                matcher: LanguageMatcher {
1414                    path_suffixes: vec!["rs".into()],
1415                    ..Default::default()
1416                },
1417                collapsed_placeholder: " /* ... */ ".to_string(),
1418                ..Default::default()
1419            },
1420            Some(tree_sitter_rust::LANGUAGE.into()),
1421        ))
1422    }
1423
1424    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1425        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1426            source_range,
1427            level: HeadingLevel::H1,
1428            contents,
1429        })
1430    }
1431
1432    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1433        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1434            source_range,
1435            level: HeadingLevel::H2,
1436            contents,
1437        })
1438    }
1439
1440    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1441        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1442            source_range,
1443            level: HeadingLevel::H3,
1444            contents,
1445        })
1446    }
1447
1448    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1449        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1450    }
1451
1452    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1453        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1454            highlights: Vec::new(),
1455            region_ranges: Vec::new(),
1456            regions: Vec::new(),
1457            source_range,
1458            contents: contents.to_string(),
1459        })]
1460    }
1461
1462    fn block_quote(
1463        children: Vec<ParsedMarkdownElement>,
1464        source_range: Range<usize>,
1465    ) -> ParsedMarkdownElement {
1466        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1467            source_range,
1468            children,
1469        })
1470    }
1471
1472    fn code_block(
1473        language: Option<String>,
1474        code: &str,
1475        source_range: Range<usize>,
1476        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1477    ) -> ParsedMarkdownElement {
1478        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1479            source_range,
1480            language,
1481            contents: code.to_string().into(),
1482            highlights,
1483        })
1484    }
1485
1486    fn list_item(
1487        source_range: Range<usize>,
1488        depth: u16,
1489        item_type: ParsedMarkdownListItemType,
1490        content: Vec<ParsedMarkdownElement>,
1491    ) -> ParsedMarkdownElement {
1492        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1493            source_range,
1494            item_type,
1495            depth,
1496            content,
1497        })
1498    }
1499
1500    fn table(
1501        source_range: Range<usize>,
1502        header: ParsedMarkdownTableRow,
1503        body: Vec<ParsedMarkdownTableRow>,
1504    ) -> ParsedMarkdownTable {
1505        ParsedMarkdownTable {
1506            column_alignments: Vec::new(),
1507            source_range,
1508            header,
1509            body,
1510        }
1511    }
1512
1513    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1514        ParsedMarkdownTableRow { children }
1515    }
1516
1517    impl PartialEq for ParsedMarkdownTable {
1518        fn eq(&self, other: &Self) -> bool {
1519            self.source_range == other.source_range
1520                && self.header == other.header
1521                && self.body == other.body
1522        }
1523    }
1524
1525    impl PartialEq for ParsedMarkdownText {
1526        fn eq(&self, other: &Self) -> bool {
1527            self.source_range == other.source_range && self.contents == other.contents
1528        }
1529    }
1530}