markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        self.tokens.get(self.cursor + steps)
  80    }
  81
  82    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        self.tokens.get(self.cursor - 1)
  87    }
  88
  89    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
  90        self.peek(0)
  91    }
  92
  93    fn current_event(&self) -> Option<&Event<'_>> {
  94        self.current().map(|(event, _)| event)
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::InlineHtml(_)
 104            | Event::FootnoteReference(_)
 105            | Event::Start(Tag::Link { .. })
 106            | Event::Start(Tag::Emphasis)
 107            | Event::Start(Tag::Strong)
 108            | Event::Start(Tag::Strikethrough)
 109            | Event::Start(Tag::Image { .. }) => {
 110                true
 111            }
 112            _ => false,
 113        }
 114    }
 115
 116    async fn parse_document(mut self) -> Self {
 117        while !self.eof() {
 118            if let Some(block) = self.parse_block().await {
 119                self.parsed.extend(block);
 120            } else {
 121                self.cursor += 1;
 122            }
 123        }
 124        self
 125    }
 126
 127    #[async_recursion]
 128    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 129        let (current, source_range) = self.current().unwrap();
 130        let source_range = source_range.clone();
 131        match current {
 132            Event::Start(tag) => match tag {
 133                Tag::Paragraph => {
 134                    self.cursor += 1;
 135                    let text = self.parse_text(false, Some(source_range));
 136                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 137                }
 138                Tag::Heading { level, .. } => {
 139                    let level = *level;
 140                    self.cursor += 1;
 141                    let heading = self.parse_heading(level);
 142                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 143                }
 144                Tag::Table(alignment) => {
 145                    let alignment = alignment.clone();
 146                    self.cursor += 1;
 147                    let table = self.parse_table(alignment);
 148                    Some(vec![ParsedMarkdownElement::Table(table)])
 149                }
 150                Tag::List(order) => {
 151                    let order = *order;
 152                    self.cursor += 1;
 153                    let list = self.parse_list(order).await;
 154                    Some(list)
 155                }
 156                Tag::BlockQuote(_kind) => {
 157                    self.cursor += 1;
 158                    let block_quote = self.parse_block_quote().await;
 159                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 160                }
 161                Tag::CodeBlock(kind) => {
 162                    let language = match kind {
 163                        pulldown_cmark::CodeBlockKind::Indented => None,
 164                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 165                            if language.is_empty() {
 166                                None
 167                            } else {
 168                                Some(language.to_string())
 169                            }
 170                        }
 171                    };
 172
 173                    self.cursor += 1;
 174
 175                    let code_block = self.parse_code_block(language).await;
 176                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 177                }
 178                _ => None,
 179            },
 180            Event::Rule => {
 181                self.cursor += 1;
 182                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 183            }
 184            _ => None,
 185        }
 186    }
 187
 188    fn parse_text(
 189        &mut self,
 190        should_complete_on_soft_break: bool,
 191        source_range: Option<Range<usize>>,
 192    ) -> MarkdownParagraph {
 193        let source_range = source_range.unwrap_or_else(|| {
 194            self.current()
 195                .map(|(_, range)| range.clone())
 196                .unwrap_or_default()
 197        });
 198
 199        let mut markdown_text_like = Vec::new();
 200        let mut text = String::new();
 201        let mut bold_depth = 0;
 202        let mut italic_depth = 0;
 203        let mut strikethrough_depth = 0;
 204        let mut link: Option<Link> = None;
 205        let mut image: Option<Image> = None;
 206        let mut region_ranges: Vec<Range<usize>> = vec![];
 207        let mut regions: Vec<ParsedRegion> = vec![];
 208        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 209        let mut link_urls: Vec<String> = vec![];
 210        let mut link_ranges: Vec<Range<usize>> = vec![];
 211
 212        loop {
 213            if self.eof() {
 214                break;
 215            }
 216
 217            let (current, _) = self.current().unwrap();
 218            let prev_len = text.len();
 219            match current {
 220                Event::SoftBreak => {
 221                    if should_complete_on_soft_break {
 222                        break;
 223                    }
 224                    text.push(' ');
 225                }
 226
 227                Event::HardBreak => {
 228                    text.push('\n');
 229                }
 230
 231                // We want to ignore any inline HTML tags in the text but keep
 232                // the text between them
 233                Event::InlineHtml(_) => {}
 234
 235                Event::Text(t) => {
 236                    text.push_str(t.as_ref());
 237                    let mut style = MarkdownHighlightStyle::default();
 238
 239                    if bold_depth > 0 {
 240                        style.weight = FontWeight::BOLD;
 241                    }
 242
 243                    if italic_depth > 0 {
 244                        style.italic = true;
 245                    }
 246
 247                    if strikethrough_depth > 0 {
 248                        style.strikethrough = true;
 249                    }
 250
 251                    let last_run_len = if let Some(link) = link.clone() {
 252                        region_ranges.push(prev_len..text.len());
 253                        regions.push(ParsedRegion {
 254                            code: false,
 255                            link: Some(link),
 256                        });
 257                        style.underline = true;
 258                        prev_len
 259                    } else {
 260                        // Manually scan for links
 261                        let mut finder = linkify::LinkFinder::new();
 262                        finder.kinds(&[linkify::LinkKind::Url]);
 263                        let mut last_link_len = prev_len;
 264                        for link in finder.links(t) {
 265                            let start = link.start();
 266                            let end = link.end();
 267                            let range = (prev_len + start)..(prev_len + end);
 268                            link_ranges.push(range.clone());
 269                            link_urls.push(link.as_str().to_string());
 270
 271                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 272                            if style != MarkdownHighlightStyle::default()
 273                                && last_link_len < link.start()
 274                            {
 275                                highlights.push((
 276                                    last_link_len..link.start(),
 277                                    MarkdownHighlight::Style(style.clone()),
 278                                ));
 279                            }
 280
 281                            highlights.push((
 282                                range.clone(),
 283                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 284                                    underline: true,
 285                                    ..style
 286                                }),
 287                            ));
 288                            region_ranges.push(range.clone());
 289                            regions.push(ParsedRegion {
 290                                code: false,
 291                                link: Some(Link::Web {
 292                                    url: link.as_str().to_string(),
 293                                }),
 294                            });
 295                            last_link_len = end;
 296                        }
 297                        last_link_len
 298                    };
 299
 300                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 301                        let mut new_highlight = true;
 302                        if let Some((last_range, last_style)) = highlights.last_mut()
 303                            && last_range.end == last_run_len
 304                            && last_style == &MarkdownHighlight::Style(style.clone())
 305                        {
 306                            last_range.end = text.len();
 307                            new_highlight = false;
 308                        }
 309                        if new_highlight {
 310                            highlights.push((
 311                                last_run_len..text.len(),
 312                                MarkdownHighlight::Style(style.clone()),
 313                            ));
 314                        }
 315                    }
 316                }
 317                Event::Code(t) => {
 318                    text.push_str(t.as_ref());
 319                    region_ranges.push(prev_len..text.len());
 320
 321                    if link.is_some() {
 322                        highlights.push((
 323                            prev_len..text.len(),
 324                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 325                                underline: true,
 326                                ..Default::default()
 327                            }),
 328                        ));
 329                    }
 330                    regions.push(ParsedRegion {
 331                        code: true,
 332                        link: link.clone(),
 333                    });
 334                }
 335                Event::Start(tag) => match tag {
 336                    Tag::Emphasis => italic_depth += 1,
 337                    Tag::Strong => bold_depth += 1,
 338                    Tag::Strikethrough => strikethrough_depth += 1,
 339                    Tag::Link { dest_url, .. } => {
 340                        link = Link::identify(
 341                            self.file_location_directory.clone(),
 342                            dest_url.to_string(),
 343                        );
 344                    }
 345                    Tag::Image { dest_url, .. } => {
 346                        if !text.is_empty() {
 347                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 348                                source_range: source_range.clone(),
 349                                contents: text.clone(),
 350                                highlights: highlights.clone(),
 351                                region_ranges: region_ranges.clone(),
 352                                regions: regions.clone(),
 353                            });
 354                            text = String::new();
 355                            highlights = vec![];
 356                            region_ranges = vec![];
 357                            regions = vec![];
 358                            markdown_text_like.push(parsed_regions);
 359                        }
 360                        image = Image::identify(
 361                            dest_url.to_string(),
 362                            source_range.clone(),
 363                            self.file_location_directory.clone(),
 364                        );
 365                    }
 366                    _ => {
 367                        break;
 368                    }
 369                },
 370
 371                Event::End(tag) => match tag {
 372                    TagEnd::Emphasis => italic_depth -= 1,
 373                    TagEnd::Strong => bold_depth -= 1,
 374                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 375                    TagEnd::Link => {
 376                        link = None;
 377                    }
 378                    TagEnd::Image => {
 379                        if let Some(mut image) = image.take() {
 380                            if !text.is_empty() {
 381                                image.alt_text = Some(std::mem::take(&mut text).into());
 382                            }
 383                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 384                        }
 385                    }
 386                    TagEnd::Paragraph => {
 387                        self.cursor += 1;
 388                        break;
 389                    }
 390                    _ => {
 391                        break;
 392                    }
 393                },
 394                _ => {
 395                    break;
 396                }
 397            }
 398
 399            self.cursor += 1;
 400        }
 401        if !text.is_empty() {
 402            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 403                source_range,
 404                contents: text,
 405                highlights,
 406                regions,
 407                region_ranges,
 408            }));
 409        }
 410        markdown_text_like
 411    }
 412
 413    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 414        let (_event, source_range) = self.previous().unwrap();
 415        let source_range = source_range.clone();
 416        let text = self.parse_text(true, None);
 417
 418        // Advance past the heading end tag
 419        self.cursor += 1;
 420
 421        ParsedMarkdownHeading {
 422            source_range,
 423            level: match level {
 424                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 425                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 426                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 427                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 428                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 429                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 430            },
 431            contents: text,
 432        }
 433    }
 434
 435    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 436        let (_event, source_range) = self.previous().unwrap();
 437        let source_range = source_range.clone();
 438        let mut header = ParsedMarkdownTableRow::new();
 439        let mut body = vec![];
 440        let mut current_row = vec![];
 441        let mut in_header = true;
 442        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 443
 444        loop {
 445            if self.eof() {
 446                break;
 447            }
 448
 449            let (current, source_range) = self.current().unwrap();
 450            let source_range = source_range.clone();
 451            match current {
 452                Event::Start(Tag::TableHead)
 453                | Event::Start(Tag::TableRow)
 454                | Event::End(TagEnd::TableCell) => {
 455                    self.cursor += 1;
 456                }
 457                Event::Start(Tag::TableCell) => {
 458                    self.cursor += 1;
 459                    let cell_contents = self.parse_text(false, Some(source_range));
 460                    current_row.push(cell_contents);
 461                }
 462                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 463                    self.cursor += 1;
 464                    let new_row = std::mem::take(&mut current_row);
 465                    if in_header {
 466                        header.children = new_row;
 467                        in_header = false;
 468                    } else {
 469                        let row = ParsedMarkdownTableRow::with_children(new_row);
 470                        body.push(row);
 471                    }
 472                }
 473                Event::End(TagEnd::Table) => {
 474                    self.cursor += 1;
 475                    break;
 476                }
 477                _ => {
 478                    break;
 479                }
 480            }
 481        }
 482
 483        ParsedMarkdownTable {
 484            source_range,
 485            header,
 486            body,
 487            column_alignments,
 488        }
 489    }
 490
 491    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 492        match alignment {
 493            Alignment::None => ParsedMarkdownTableAlignment::None,
 494            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 495            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 496            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 497        }
 498    }
 499
 500    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 501        let (_, list_source_range) = self.previous().unwrap();
 502
 503        let mut items = Vec::new();
 504        let mut items_stack = vec![MarkdownListItem::default()];
 505        let mut depth = 1;
 506        let mut order = order;
 507        let mut order_stack = Vec::new();
 508
 509        let mut insertion_indices = FxHashMap::default();
 510        let mut source_ranges = FxHashMap::default();
 511        let mut start_item_range = list_source_range.clone();
 512
 513        while !self.eof() {
 514            let (current, source_range) = self.current().unwrap();
 515            match current {
 516                Event::Start(Tag::List(new_order)) => {
 517                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 518                        insertion_indices.insert(depth, items.len());
 519                    }
 520
 521                    // We will use the start of the nested list as the end for the current item's range,
 522                    // because we don't care about the hierarchy of list items
 523                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 524                        e.insert(start_item_range.start..source_range.start);
 525                    }
 526
 527                    order_stack.push(order);
 528                    order = *new_order;
 529                    self.cursor += 1;
 530                    depth += 1;
 531                }
 532                Event::End(TagEnd::List(_)) => {
 533                    order = order_stack.pop().flatten();
 534                    self.cursor += 1;
 535                    depth -= 1;
 536
 537                    if depth == 0 {
 538                        break;
 539                    }
 540                }
 541                Event::Start(Tag::Item) => {
 542                    start_item_range = source_range.clone();
 543
 544                    self.cursor += 1;
 545                    items_stack.push(MarkdownListItem::default());
 546
 547                    let mut task_list = None;
 548                    // Check for task list marker (`- [ ]` or `- [x]`)
 549                    if let Some(event) = self.current_event() {
 550                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 551                        if event == &Event::Start(Tag::Paragraph) {
 552                            self.cursor += 1;
 553                        }
 554
 555                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 556                            task_list = Some((*checked, range.clone()));
 557                            self.cursor += 1;
 558                        }
 559                    }
 560
 561                    if let Some((event, range)) = self.current() {
 562                        // This is a plain list item.
 563                        // For example `- some text` or `1. [Docs](./docs.md)`
 564                        if MarkdownParser::is_text_like(event) {
 565                            let text = self.parse_text(false, Some(range.clone()));
 566                            let block = ParsedMarkdownElement::Paragraph(text);
 567                            if let Some(content) = items_stack.last_mut() {
 568                                let item_type = if let Some((checked, range)) = task_list {
 569                                    ParsedMarkdownListItemType::Task(checked, range)
 570                                } else if let Some(order) = order {
 571                                    ParsedMarkdownListItemType::Ordered(order)
 572                                } else {
 573                                    ParsedMarkdownListItemType::Unordered
 574                                };
 575                                content.item_type = item_type;
 576                                content.content.push(block);
 577                            }
 578                        } else {
 579                            let block = self.parse_block().await;
 580                            if let Some(block) = block
 581                                && let Some(list_item) = items_stack.last_mut()
 582                            {
 583                                list_item.content.extend(block);
 584                            }
 585                        }
 586                    }
 587
 588                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 589                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 590                        self.cursor += 1;
 591                    }
 592                }
 593                Event::End(TagEnd::Item) => {
 594                    self.cursor += 1;
 595
 596                    if let Some(current) = order {
 597                        order = Some(current + 1);
 598                    }
 599
 600                    if let Some(list_item) = items_stack.pop() {
 601                        let source_range = source_ranges
 602                            .remove(&depth)
 603                            .unwrap_or(start_item_range.clone());
 604
 605                        // We need to remove the last character of the source range, because it includes the newline character
 606                        let source_range = source_range.start..source_range.end - 1;
 607                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 608                            source_range,
 609                            content: list_item.content,
 610                            depth,
 611                            item_type: list_item.item_type,
 612                        });
 613
 614                        if let Some(index) = insertion_indices.get(&depth) {
 615                            items.insert(*index, item);
 616                            insertion_indices.remove(&depth);
 617                        } else {
 618                            items.push(item);
 619                        }
 620                    }
 621                }
 622                _ => {
 623                    if depth == 0 {
 624                        break;
 625                    }
 626                    // This can only happen if a list item starts with more then one paragraph,
 627                    // or the list item contains blocks that should be rendered after the nested list items
 628                    let block = self.parse_block().await;
 629                    if let Some(block) = block {
 630                        if let Some(list_item) = items_stack.last_mut() {
 631                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 632                            if !insertion_indices.contains_key(&depth) {
 633                                list_item.content.extend(block);
 634                                continue;
 635                            }
 636                        }
 637
 638                        // Otherwise we need to insert the block after all the nested items
 639                        // that have been parsed so far
 640                        items.extend(block);
 641                    } else {
 642                        self.cursor += 1;
 643                    }
 644                }
 645            }
 646        }
 647
 648        items
 649    }
 650
 651    #[async_recursion]
 652    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 653        let (_event, source_range) = self.previous().unwrap();
 654        let source_range = source_range.clone();
 655        let mut nested_depth = 1;
 656
 657        let mut children: Vec<ParsedMarkdownElement> = vec![];
 658
 659        while !self.eof() {
 660            let block = self.parse_block().await;
 661
 662            if let Some(block) = block {
 663                children.extend(block);
 664            } else {
 665                break;
 666            }
 667
 668            if self.eof() {
 669                break;
 670            }
 671
 672            let (current, _source_range) = self.current().unwrap();
 673            match current {
 674                // This is a nested block quote.
 675                // Record that we're in a nested block quote and continue parsing.
 676                // We don't need to advance the cursor since the next
 677                // call to `parse_block` will handle it.
 678                Event::Start(Tag::BlockQuote(_kind)) => {
 679                    nested_depth += 1;
 680                }
 681                Event::End(TagEnd::BlockQuote(_kind)) => {
 682                    nested_depth -= 1;
 683                    if nested_depth == 0 {
 684                        self.cursor += 1;
 685                        break;
 686                    }
 687                }
 688                _ => {}
 689            };
 690        }
 691
 692        ParsedMarkdownBlockQuote {
 693            source_range,
 694            children,
 695        }
 696    }
 697
 698    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 699        let (_event, source_range) = self.previous().unwrap();
 700        let source_range = source_range.clone();
 701        let mut code = String::new();
 702
 703        while !self.eof() {
 704            let (current, _source_range) = self.current().unwrap();
 705            match current {
 706                Event::Text(text) => {
 707                    code.push_str(text);
 708                    self.cursor += 1;
 709                }
 710                Event::End(TagEnd::CodeBlock) => {
 711                    self.cursor += 1;
 712                    break;
 713                }
 714                _ => {
 715                    break;
 716                }
 717            }
 718        }
 719
 720        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 721
 722        let highlights = if let Some(language) = &language {
 723            if let Some(registry) = &self.language_registry {
 724                let rope: language::Rope = code.as_str().into();
 725                registry
 726                    .language_for_name_or_extension(language)
 727                    .await
 728                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 729                    .ok()
 730            } else {
 731                None
 732            }
 733        } else {
 734            None
 735        };
 736
 737        ParsedMarkdownCodeBlock {
 738            source_range,
 739            contents: code.into(),
 740            language,
 741            highlights,
 742        }
 743    }
 744}
 745
 746#[cfg(test)]
 747mod tests {
 748    use core::panic;
 749
 750    use super::*;
 751
 752    use ParsedMarkdownListItemType::*;
 753    use gpui::BackgroundExecutor;
 754    use language::{
 755        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
 756    };
 757    use pretty_assertions::assert_eq;
 758
 759    async fn parse(input: &str) -> ParsedMarkdown {
 760        parse_markdown(input, None, None).await
 761    }
 762
 763    #[gpui::test]
 764    async fn test_headings() {
 765        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 766
 767        assert_eq!(
 768            parsed.children,
 769            vec![
 770                h1(text("Heading one", 2..13), 0..14),
 771                h2(text("Heading two", 17..28), 14..29),
 772                h3(text("Heading three", 33..46), 29..46),
 773            ]
 774        );
 775    }
 776
 777    #[gpui::test]
 778    async fn test_newlines_dont_new_paragraphs() {
 779        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 780
 781        assert_eq!(
 782            parsed.children,
 783            vec![p("Some text that is bolded and italicized", 0..46)]
 784        );
 785    }
 786
 787    #[gpui::test]
 788    async fn test_heading_with_paragraph() {
 789        let parsed = parse("# Zed\nThe editor").await;
 790
 791        assert_eq!(
 792            parsed.children,
 793            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 794        );
 795    }
 796
 797    #[gpui::test]
 798    async fn test_double_newlines_do_new_paragraphs() {
 799        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 800
 801        assert_eq!(
 802            parsed.children,
 803            vec![
 804                p("Some text that is bolded", 0..29),
 805                p("and italicized", 31..47),
 806            ]
 807        );
 808    }
 809
 810    #[gpui::test]
 811    async fn test_bold_italic_text() {
 812        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 813
 814        assert_eq!(
 815            parsed.children,
 816            vec![p("Some text that is bolded and italicized", 0..45)]
 817        );
 818    }
 819
 820    #[gpui::test]
 821    async fn test_nested_bold_strikethrough_text() {
 822        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 823
 824        assert_eq!(parsed.children.len(), 1);
 825        assert_eq!(
 826            parsed.children[0],
 827            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 828                ParsedMarkdownText {
 829                    source_range: 0..35,
 830                    contents: "Some bostrikethroughld text".to_string(),
 831                    highlights: Vec::new(),
 832                    region_ranges: Vec::new(),
 833                    regions: Vec::new(),
 834                }
 835            )])
 836        );
 837
 838        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 839            text
 840        } else {
 841            panic!("Expected a paragraph");
 842        };
 843
 844        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 845            text
 846        } else {
 847            panic!("Expected a text");
 848        };
 849
 850        assert_eq!(
 851            paragraph.highlights,
 852            vec![
 853                (
 854                    5..7,
 855                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 856                        weight: FontWeight::BOLD,
 857                        ..Default::default()
 858                    }),
 859                ),
 860                (
 861                    7..20,
 862                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 863                        weight: FontWeight::BOLD,
 864                        strikethrough: true,
 865                        ..Default::default()
 866                    }),
 867                ),
 868                (
 869                    20..22,
 870                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 871                        weight: FontWeight::BOLD,
 872                        ..Default::default()
 873                    }),
 874                ),
 875            ]
 876        );
 877    }
 878
 879    #[gpui::test]
 880    async fn test_text_with_inline_html() {
 881        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 882
 883        assert_eq!(
 884            parsed.children,
 885            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 886        );
 887    }
 888
 889    #[gpui::test]
 890    async fn test_raw_links_detection() {
 891        let parsed = parse("Checkout this https://zed.dev link").await;
 892
 893        assert_eq!(
 894            parsed.children,
 895            vec![p("Checkout this https://zed.dev link", 0..34)]
 896        );
 897    }
 898
 899    #[gpui::test]
 900    async fn test_empty_image() {
 901        let parsed = parse("![]()").await;
 902
 903        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 904            text
 905        } else {
 906            panic!("Expected a paragraph");
 907        };
 908        assert_eq!(paragraph.len(), 0);
 909    }
 910
 911    #[gpui::test]
 912    async fn test_image_links_detection() {
 913        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 914
 915        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 916            text
 917        } else {
 918            panic!("Expected a paragraph");
 919        };
 920        assert_eq!(
 921            paragraph[0],
 922            MarkdownParagraphChunk::Image(Image {
 923                source_range: 0..111,
 924                link: Link::Web {
 925                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 926                },
 927                alt_text: Some("test".into()),
 928            },)
 929        );
 930    }
 931
 932    #[gpui::test]
 933    async fn test_image_without_alt_text() {
 934        let parsed = parse("![](http://example.com/foo.png)").await;
 935
 936        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 937            text
 938        } else {
 939            panic!("Expected a paragraph");
 940        };
 941        assert_eq!(
 942            paragraph[0],
 943            MarkdownParagraphChunk::Image(Image {
 944                source_range: 0..31,
 945                link: Link::Web {
 946                    url: "http://example.com/foo.png".to_string(),
 947                },
 948                alt_text: None,
 949            },)
 950        );
 951    }
 952
 953    #[gpui::test]
 954    async fn test_image_with_alt_text_containing_formatting() {
 955        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
 956
 957        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
 958            panic!("Expected a paragraph");
 959        };
 960        assert_eq!(
 961            chunks,
 962            &[MarkdownParagraphChunk::Image(Image {
 963                source_range: 0..44,
 964                link: Link::Web {
 965                    url: "http://example.com/foo.png".to_string(),
 966                },
 967                alt_text: Some("foo bar baz".into()),
 968            }),],
 969        );
 970    }
 971
 972    #[gpui::test]
 973    async fn test_images_with_text_in_between() {
 974        let parsed = parse(
 975            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
 976        )
 977        .await;
 978
 979        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 980            text
 981        } else {
 982            panic!("Expected a paragraph");
 983        };
 984        assert_eq!(
 985            chunks,
 986            &vec![
 987                MarkdownParagraphChunk::Image(Image {
 988                    source_range: 0..81,
 989                    link: Link::Web {
 990                        url: "http://example.com/foo.png".to_string(),
 991                    },
 992                    alt_text: Some("foo".into()),
 993                }),
 994                MarkdownParagraphChunk::Text(ParsedMarkdownText {
 995                    source_range: 0..81,
 996                    contents: " Lorem Ipsum ".to_string(),
 997                    highlights: Vec::new(),
 998                    region_ranges: Vec::new(),
 999                    regions: Vec::new(),
1000                }),
1001                MarkdownParagraphChunk::Image(Image {
1002                    source_range: 0..81,
1003                    link: Link::Web {
1004                        url: "http://example.com/bar.png".to_string(),
1005                    },
1006                    alt_text: Some("bar".into()),
1007                })
1008            ]
1009        );
1010    }
1011
1012    #[gpui::test]
1013    async fn test_header_only_table() {
1014        let markdown = "\
1015| Header 1 | Header 2 |
1016|----------|----------|
1017
1018Some other content
1019";
1020
1021        let expected_table = table(
1022            0..48,
1023            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1024            vec![],
1025        );
1026
1027        assert_eq!(
1028            parse(markdown).await.children[0],
1029            ParsedMarkdownElement::Table(expected_table)
1030        );
1031    }
1032
1033    #[gpui::test]
1034    async fn test_basic_table() {
1035        let markdown = "\
1036| Header 1 | Header 2 |
1037|----------|----------|
1038| Cell 1   | Cell 2   |
1039| Cell 3   | Cell 4   |";
1040
1041        let expected_table = table(
1042            0..95,
1043            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1044            vec![
1045                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1046                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1047            ],
1048        );
1049
1050        assert_eq!(
1051            parse(markdown).await.children[0],
1052            ParsedMarkdownElement::Table(expected_table)
1053        );
1054    }
1055
1056    #[gpui::test]
1057    async fn test_list_basic() {
1058        let parsed = parse(
1059            "\
1060* Item 1
1061* Item 2
1062* Item 3
1063",
1064        )
1065        .await;
1066
1067        assert_eq!(
1068            parsed.children,
1069            vec![
1070                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1071                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1072                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1073            ],
1074        );
1075    }
1076
1077    #[gpui::test]
1078    async fn test_list_with_tasks() {
1079        let parsed = parse(
1080            "\
1081- [ ] TODO
1082- [x] Checked
1083",
1084        )
1085        .await;
1086
1087        assert_eq!(
1088            parsed.children,
1089            vec![
1090                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1091                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1092            ],
1093        );
1094    }
1095
1096    #[gpui::test]
1097    async fn test_list_with_indented_task() {
1098        let parsed = parse(
1099            "\
1100- [ ] TODO
1101  - [x] Checked
1102  - Unordered
1103  1. Number 1
1104  1. Number 2
11051. Number A
1106",
1107        )
1108        .await;
1109
1110        assert_eq!(
1111            parsed.children,
1112            vec![
1113                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1114                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1115                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1116                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1117                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1118                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1119            ],
1120        );
1121    }
1122
1123    #[gpui::test]
1124    async fn test_list_with_linebreak_is_handled_correctly() {
1125        let parsed = parse(
1126            "\
1127- [ ] Task 1
1128
1129- [x] Task 2
1130",
1131        )
1132        .await;
1133
1134        assert_eq!(
1135            parsed.children,
1136            vec![
1137                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1138                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1139            ],
1140        );
1141    }
1142
1143    #[gpui::test]
1144    async fn test_list_nested() {
1145        let parsed = parse(
1146            "\
1147* Item 1
1148* Item 2
1149* Item 3
1150
11511. Hello
11521. Two
1153   1. Three
11542. Four
11553. Five
1156
1157* First
1158  1. Hello
1159     1. Goodbyte
1160        - Inner
1161        - Inner
1162  2. Goodbyte
1163        - Next item empty
1164        -
1165* Last
1166",
1167        )
1168        .await;
1169
1170        assert_eq!(
1171            parsed.children,
1172            vec![
1173                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1174                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1175                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1176                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1177                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1178                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1179                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1180                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1181                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1182                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1183                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1184                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1185                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1186                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1187                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1188                list_item(186..190, 3, Unordered, vec![]),
1189                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1190            ]
1191        );
1192    }
1193
1194    #[gpui::test]
1195    async fn test_list_with_nested_content() {
1196        let parsed = parse(
1197            "\
1198*   This is a list item with two paragraphs.
1199
1200    This is the second paragraph in the list item.
1201",
1202        )
1203        .await;
1204
1205        assert_eq!(
1206            parsed.children,
1207            vec![list_item(
1208                0..96,
1209                1,
1210                Unordered,
1211                vec![
1212                    p("This is a list item with two paragraphs.", 4..44),
1213                    p("This is the second paragraph in the list item.", 50..97)
1214                ],
1215            ),],
1216        );
1217    }
1218
1219    #[gpui::test]
1220    async fn test_list_item_with_inline_html() {
1221        let parsed = parse(
1222            "\
1223*   This is a list item with an inline HTML <sometag>tag</sometag>.
1224",
1225        )
1226        .await;
1227
1228        assert_eq!(
1229            parsed.children,
1230            vec![list_item(
1231                0..67,
1232                1,
1233                Unordered,
1234                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1235            ),],
1236        );
1237    }
1238
1239    #[gpui::test]
1240    async fn test_nested_list_with_paragraph_inside() {
1241        let parsed = parse(
1242            "\
12431. a
1244    1. b
1245        1. c
1246
1247    text
1248
1249    1. d
1250",
1251        )
1252        .await;
1253
1254        assert_eq!(
1255            parsed.children,
1256            vec![
1257                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1258                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1259                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1260                p("text", 32..37),
1261                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1262            ],
1263        );
1264    }
1265
1266    #[gpui::test]
1267    async fn test_list_with_leading_text() {
1268        let parsed = parse(
1269            "\
1270* `code`
1271* **bold**
1272* [link](https://example.com)
1273",
1274        )
1275        .await;
1276
1277        assert_eq!(
1278            parsed.children,
1279            vec![
1280                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1281                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1282                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1283            ],
1284        );
1285    }
1286
1287    #[gpui::test]
1288    async fn test_simple_block_quote() {
1289        let parsed = parse("> Simple block quote with **styled text**").await;
1290
1291        assert_eq!(
1292            parsed.children,
1293            vec![block_quote(
1294                vec![p("Simple block quote with styled text", 2..41)],
1295                0..41
1296            )]
1297        );
1298    }
1299
1300    #[gpui::test]
1301    async fn test_simple_block_quote_with_multiple_lines() {
1302        let parsed = parse(
1303            "\
1304> # Heading
1305> More
1306> text
1307>
1308> More text
1309",
1310        )
1311        .await;
1312
1313        assert_eq!(
1314            parsed.children,
1315            vec![block_quote(
1316                vec![
1317                    h1(text("Heading", 4..11), 2..12),
1318                    p("More text", 14..26),
1319                    p("More text", 30..40)
1320                ],
1321                0..40
1322            )]
1323        );
1324    }
1325
1326    #[gpui::test]
1327    async fn test_nested_block_quote() {
1328        let parsed = parse(
1329            "\
1330> A
1331>
1332> > # B
1333>
1334> C
1335
1336More text
1337",
1338        )
1339        .await;
1340
1341        assert_eq!(
1342            parsed.children,
1343            vec![
1344                block_quote(
1345                    vec![
1346                        p("A", 2..4),
1347                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1348                        p("C", 18..20)
1349                    ],
1350                    0..20
1351                ),
1352                p("More text", 21..31)
1353            ]
1354        );
1355    }
1356
1357    #[gpui::test]
1358    async fn test_code_block() {
1359        let parsed = parse(
1360            "\
1361```
1362fn main() {
1363    return 0;
1364}
1365```
1366",
1367        )
1368        .await;
1369
1370        assert_eq!(
1371            parsed.children,
1372            vec![code_block(
1373                None,
1374                "fn main() {\n    return 0;\n}",
1375                0..35,
1376                None
1377            )]
1378        );
1379    }
1380
1381    #[gpui::test]
1382    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1383        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1384        language_registry.add(rust_lang());
1385
1386        let parsed = parse_markdown(
1387            "\
1388```rust
1389fn main() {
1390    return 0;
1391}
1392```
1393",
1394            None,
1395            Some(language_registry),
1396        )
1397        .await;
1398
1399        assert_eq!(
1400            parsed.children,
1401            vec![code_block(
1402                Some("rust".to_string()),
1403                "fn main() {\n    return 0;\n}",
1404                0..39,
1405                Some(vec![])
1406            )]
1407        );
1408    }
1409
1410    fn rust_lang() -> Arc<Language> {
1411        Arc::new(Language::new(
1412            LanguageConfig {
1413                name: "Rust".into(),
1414                matcher: LanguageMatcher {
1415                    path_suffixes: vec!["rs".into()],
1416                    ..Default::default()
1417                },
1418                collapsed_placeholder: " /* ... */ ".to_string(),
1419                ..Default::default()
1420            },
1421            Some(tree_sitter_rust::LANGUAGE.into()),
1422        ))
1423    }
1424
1425    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1426        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1427            source_range,
1428            level: HeadingLevel::H1,
1429            contents,
1430        })
1431    }
1432
1433    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1434        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1435            source_range,
1436            level: HeadingLevel::H2,
1437            contents,
1438        })
1439    }
1440
1441    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1442        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1443            source_range,
1444            level: HeadingLevel::H3,
1445            contents,
1446        })
1447    }
1448
1449    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1450        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1451    }
1452
1453    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1454        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1455            highlights: Vec::new(),
1456            region_ranges: Vec::new(),
1457            regions: Vec::new(),
1458            source_range,
1459            contents: contents.to_string(),
1460        })]
1461    }
1462
1463    fn block_quote(
1464        children: Vec<ParsedMarkdownElement>,
1465        source_range: Range<usize>,
1466    ) -> ParsedMarkdownElement {
1467        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1468            source_range,
1469            children,
1470        })
1471    }
1472
1473    fn code_block(
1474        language: Option<String>,
1475        code: &str,
1476        source_range: Range<usize>,
1477        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1478    ) -> ParsedMarkdownElement {
1479        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1480            source_range,
1481            language,
1482            contents: code.to_string().into(),
1483            highlights,
1484        })
1485    }
1486
1487    fn list_item(
1488        source_range: Range<usize>,
1489        depth: u16,
1490        item_type: ParsedMarkdownListItemType,
1491        content: Vec<ParsedMarkdownElement>,
1492    ) -> ParsedMarkdownElement {
1493        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1494            source_range,
1495            item_type,
1496            depth,
1497            content,
1498        })
1499    }
1500
1501    fn table(
1502        source_range: Range<usize>,
1503        header: ParsedMarkdownTableRow,
1504        body: Vec<ParsedMarkdownTableRow>,
1505    ) -> ParsedMarkdownTable {
1506        ParsedMarkdownTable {
1507            column_alignments: Vec::new(),
1508            source_range,
1509            header,
1510            body,
1511        }
1512    }
1513
1514    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1515        ParsedMarkdownTableRow { children }
1516    }
1517
1518    impl PartialEq for ParsedMarkdownTable {
1519        fn eq(&self, other: &Self) -> bool {
1520            self.source_range == other.source_range
1521                && self.header == other.header
1522                && self.body == other.body
1523        }
1524    }
1525
1526    impl PartialEq for ParsedMarkdownText {
1527        fn eq(&self, other: &Self) -> bool {
1528            self.source_range == other.source_range && self.contents == other.contents
1529        }
1530    }
1531}