markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::FootnoteReference(_)
 104            | Event::Start(Tag::Link { .. })
 105            | Event::Start(Tag::Emphasis)
 106            | Event::Start(Tag::Strong)
 107            | Event::Start(Tag::Strikethrough)
 108            | Event::Start(Tag::Image { .. }) => {
 109                true
 110            }
 111            _ => false,
 112        }
 113    }
 114
 115    async fn parse_document(mut self) -> Self {
 116        while !self.eof() {
 117            if let Some(block) = self.parse_block().await {
 118                self.parsed.extend(block);
 119            } else {
 120                self.cursor += 1;
 121            }
 122        }
 123        self
 124    }
 125
 126    #[async_recursion]
 127    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 128        let (current, source_range) = self.current().unwrap();
 129        let source_range = source_range.clone();
 130        match current {
 131            Event::Start(tag) => match tag {
 132                Tag::Paragraph => {
 133                    self.cursor += 1;
 134                    let text = self.parse_text(false, Some(source_range));
 135                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 136                }
 137                Tag::Heading { level, .. } => {
 138                    let level = *level;
 139                    self.cursor += 1;
 140                    let heading = self.parse_heading(level);
 141                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 142                }
 143                Tag::Table(alignment) => {
 144                    let alignment = alignment.clone();
 145                    self.cursor += 1;
 146                    let table = self.parse_table(alignment);
 147                    Some(vec![ParsedMarkdownElement::Table(table)])
 148                }
 149                Tag::List(order) => {
 150                    let order = *order;
 151                    self.cursor += 1;
 152                    let list = self.parse_list(order).await;
 153                    Some(list)
 154                }
 155                Tag::BlockQuote(_kind) => {
 156                    self.cursor += 1;
 157                    let block_quote = self.parse_block_quote().await;
 158                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 159                }
 160                Tag::CodeBlock(kind) => {
 161                    let language = match kind {
 162                        pulldown_cmark::CodeBlockKind::Indented => None,
 163                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 164                            if language.is_empty() {
 165                                None
 166                            } else {
 167                                Some(language.to_string())
 168                            }
 169                        }
 170                    };
 171
 172                    self.cursor += 1;
 173
 174                    let code_block = self.parse_code_block(language).await;
 175                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 176                }
 177                _ => None,
 178            },
 179            Event::Rule => {
 180                let source_range = source_range.clone();
 181                self.cursor += 1;
 182                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 183            }
 184            _ => None,
 185        }
 186    }
 187
 188    fn parse_text(
 189        &mut self,
 190        should_complete_on_soft_break: bool,
 191        source_range: Option<Range<usize>>,
 192    ) -> MarkdownParagraph {
 193        let source_range = source_range.unwrap_or_else(|| {
 194            self.current()
 195                .map(|(_, range)| range.clone())
 196                .unwrap_or_default()
 197        });
 198
 199        let mut markdown_text_like = Vec::new();
 200        let mut text = String::new();
 201        let mut bold_depth = 0;
 202        let mut italic_depth = 0;
 203        let mut strikethrough_depth = 0;
 204        let mut link: Option<Link> = None;
 205        let mut image: Option<Image> = None;
 206        let mut region_ranges: Vec<Range<usize>> = vec![];
 207        let mut regions: Vec<ParsedRegion> = vec![];
 208        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 209        let mut link_urls: Vec<String> = vec![];
 210        let mut link_ranges: Vec<Range<usize>> = vec![];
 211
 212        loop {
 213            if self.eof() {
 214                break;
 215            }
 216
 217            let (current, _) = self.current().unwrap();
 218            let prev_len = text.len();
 219            match current {
 220                Event::SoftBreak => {
 221                    if should_complete_on_soft_break {
 222                        break;
 223                    }
 224                    text.push(' ');
 225                }
 226
 227                Event::HardBreak => {
 228                    text.push('\n');
 229                }
 230
 231                // We want to ignore any inline HTML tags in the text but keep
 232                // the text between them
 233                Event::InlineHtml(_) => {}
 234
 235                Event::Text(t) => {
 236                    text.push_str(t.as_ref());
 237                    let mut style = MarkdownHighlightStyle::default();
 238
 239                    if bold_depth > 0 {
 240                        style.weight = FontWeight::BOLD;
 241                    }
 242
 243                    if italic_depth > 0 {
 244                        style.italic = true;
 245                    }
 246
 247                    if strikethrough_depth > 0 {
 248                        style.strikethrough = true;
 249                    }
 250
 251                    let last_run_len = if let Some(link) = link.clone() {
 252                        region_ranges.push(prev_len..text.len());
 253                        regions.push(ParsedRegion {
 254                            code: false,
 255                            link: Some(link),
 256                        });
 257                        style.underline = true;
 258                        prev_len
 259                    } else {
 260                        // Manually scan for links
 261                        let mut finder = linkify::LinkFinder::new();
 262                        finder.kinds(&[linkify::LinkKind::Url]);
 263                        let mut last_link_len = prev_len;
 264                        for link in finder.links(t) {
 265                            let start = link.start();
 266                            let end = link.end();
 267                            let range = (prev_len + start)..(prev_len + end);
 268                            link_ranges.push(range.clone());
 269                            link_urls.push(link.as_str().to_string());
 270
 271                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 272                            if style != MarkdownHighlightStyle::default()
 273                                && last_link_len < link.start()
 274                            {
 275                                highlights.push((
 276                                    last_link_len..link.start(),
 277                                    MarkdownHighlight::Style(style.clone()),
 278                                ));
 279                            }
 280
 281                            highlights.push((
 282                                range.clone(),
 283                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 284                                    underline: true,
 285                                    ..style
 286                                }),
 287                            ));
 288                            region_ranges.push(range.clone());
 289                            regions.push(ParsedRegion {
 290                                code: false,
 291                                link: Some(Link::Web {
 292                                    url: link.as_str().to_string(),
 293                                }),
 294                            });
 295                            last_link_len = end;
 296                        }
 297                        last_link_len
 298                    };
 299
 300                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 301                        let mut new_highlight = true;
 302                        if let Some((last_range, last_style)) = highlights.last_mut() {
 303                            if last_range.end == last_run_len
 304                                && last_style == &MarkdownHighlight::Style(style.clone())
 305                            {
 306                                last_range.end = text.len();
 307                                new_highlight = false;
 308                            }
 309                        }
 310                        if new_highlight {
 311                            highlights.push((
 312                                last_run_len..text.len(),
 313                                MarkdownHighlight::Style(style.clone()),
 314                            ));
 315                        }
 316                    }
 317                }
 318                Event::Code(t) => {
 319                    text.push_str(t.as_ref());
 320                    region_ranges.push(prev_len..text.len());
 321
 322                    if link.is_some() {
 323                        highlights.push((
 324                            prev_len..text.len(),
 325                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 326                                underline: true,
 327                                ..Default::default()
 328                            }),
 329                        ));
 330                    }
 331                    regions.push(ParsedRegion {
 332                        code: true,
 333                        link: link.clone(),
 334                    });
 335                }
 336                Event::Start(tag) => match tag {
 337                    Tag::Emphasis => italic_depth += 1,
 338                    Tag::Strong => bold_depth += 1,
 339                    Tag::Strikethrough => strikethrough_depth += 1,
 340                    Tag::Link { dest_url, .. } => {
 341                        link = Link::identify(
 342                            self.file_location_directory.clone(),
 343                            dest_url.to_string(),
 344                        );
 345                    }
 346                    Tag::Image { dest_url, .. } => {
 347                        if !text.is_empty() {
 348                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 349                                source_range: source_range.clone(),
 350                                contents: text.clone(),
 351                                highlights: highlights.clone(),
 352                                region_ranges: region_ranges.clone(),
 353                                regions: regions.clone(),
 354                            });
 355                            text = String::new();
 356                            highlights = vec![];
 357                            region_ranges = vec![];
 358                            regions = vec![];
 359                            markdown_text_like.push(parsed_regions);
 360                        }
 361                        image = Image::identify(
 362                            dest_url.to_string(),
 363                            source_range.clone(),
 364                            self.file_location_directory.clone(),
 365                        );
 366                    }
 367                    _ => {
 368                        break;
 369                    }
 370                },
 371
 372                Event::End(tag) => match tag {
 373                    TagEnd::Emphasis => italic_depth -= 1,
 374                    TagEnd::Strong => bold_depth -= 1,
 375                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 376                    TagEnd::Link => {
 377                        link = None;
 378                    }
 379                    TagEnd::Image => {
 380                        if let Some(mut image) = image.take() {
 381                            if !text.is_empty() {
 382                                image.alt_text = Some(std::mem::take(&mut text).into());
 383                            }
 384                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 385                        }
 386                    }
 387                    TagEnd::Paragraph => {
 388                        self.cursor += 1;
 389                        break;
 390                    }
 391                    _ => {
 392                        break;
 393                    }
 394                },
 395                _ => {
 396                    break;
 397                }
 398            }
 399
 400            self.cursor += 1;
 401        }
 402        if !text.is_empty() {
 403            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 404                source_range: source_range.clone(),
 405                contents: text,
 406                highlights,
 407                regions,
 408                region_ranges,
 409            }));
 410        }
 411        markdown_text_like
 412    }
 413
 414    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 415        let (_event, source_range) = self.previous().unwrap();
 416        let source_range = source_range.clone();
 417        let text = self.parse_text(true, None);
 418
 419        // Advance past the heading end tag
 420        self.cursor += 1;
 421
 422        ParsedMarkdownHeading {
 423            source_range: source_range.clone(),
 424            level: match level {
 425                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 426                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 427                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 428                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 429                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 430                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 431            },
 432            contents: text,
 433        }
 434    }
 435
 436    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 437        let (_event, source_range) = self.previous().unwrap();
 438        let source_range = source_range.clone();
 439        let mut header = ParsedMarkdownTableRow::new();
 440        let mut body = vec![];
 441        let mut current_row = vec![];
 442        let mut in_header = true;
 443        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 444
 445        loop {
 446            if self.eof() {
 447                break;
 448            }
 449
 450            let (current, source_range) = self.current().unwrap();
 451            let source_range = source_range.clone();
 452            match current {
 453                Event::Start(Tag::TableHead)
 454                | Event::Start(Tag::TableRow)
 455                | Event::End(TagEnd::TableCell) => {
 456                    self.cursor += 1;
 457                }
 458                Event::Start(Tag::TableCell) => {
 459                    self.cursor += 1;
 460                    let cell_contents = self.parse_text(false, Some(source_range));
 461                    current_row.push(cell_contents);
 462                }
 463                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 464                    self.cursor += 1;
 465                    let new_row = std::mem::take(&mut current_row);
 466                    if in_header {
 467                        header.children = new_row;
 468                        in_header = false;
 469                    } else {
 470                        let row = ParsedMarkdownTableRow::with_children(new_row);
 471                        body.push(row);
 472                    }
 473                }
 474                Event::End(TagEnd::Table) => {
 475                    self.cursor += 1;
 476                    break;
 477                }
 478                _ => {
 479                    break;
 480                }
 481            }
 482        }
 483
 484        ParsedMarkdownTable {
 485            source_range,
 486            header,
 487            body,
 488            column_alignments,
 489        }
 490    }
 491
 492    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 493        match alignment {
 494            Alignment::None => ParsedMarkdownTableAlignment::None,
 495            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 496            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 497            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 498        }
 499    }
 500
 501    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 502        let (_, list_source_range) = self.previous().unwrap();
 503
 504        let mut items = Vec::new();
 505        let mut items_stack = vec![MarkdownListItem::default()];
 506        let mut depth = 1;
 507        let mut order = order;
 508        let mut order_stack = Vec::new();
 509
 510        let mut insertion_indices = FxHashMap::default();
 511        let mut source_ranges = FxHashMap::default();
 512        let mut start_item_range = list_source_range.clone();
 513
 514        while !self.eof() {
 515            let (current, source_range) = self.current().unwrap();
 516            match current {
 517                Event::Start(Tag::List(new_order)) => {
 518                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 519                        insertion_indices.insert(depth, items.len());
 520                    }
 521
 522                    // We will use the start of the nested list as the end for the current item's range,
 523                    // because we don't care about the hierarchy of list items
 524                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 525                        e.insert(start_item_range.start..source_range.start);
 526                    }
 527
 528                    order_stack.push(order);
 529                    order = *new_order;
 530                    self.cursor += 1;
 531                    depth += 1;
 532                }
 533                Event::End(TagEnd::List(_)) => {
 534                    order = order_stack.pop().flatten();
 535                    self.cursor += 1;
 536                    depth -= 1;
 537
 538                    if depth == 0 {
 539                        break;
 540                    }
 541                }
 542                Event::Start(Tag::Item) => {
 543                    start_item_range = source_range.clone();
 544
 545                    self.cursor += 1;
 546                    items_stack.push(MarkdownListItem::default());
 547
 548                    let mut task_list = None;
 549                    // Check for task list marker (`- [ ]` or `- [x]`)
 550                    if let Some(event) = self.current_event() {
 551                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 552                        if event == &Event::Start(Tag::Paragraph) {
 553                            self.cursor += 1;
 554                        }
 555
 556                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 557                            task_list = Some((*checked, range.clone()));
 558                            self.cursor += 1;
 559                        }
 560                    }
 561
 562                    if let Some((event, range)) = self.current() {
 563                        // This is a plain list item.
 564                        // For example `- some text` or `1. [Docs](./docs.md)`
 565                        if MarkdownParser::is_text_like(event) {
 566                            let text = self.parse_text(false, Some(range.clone()));
 567                            let block = ParsedMarkdownElement::Paragraph(text);
 568                            if let Some(content) = items_stack.last_mut() {
 569                                let item_type = if let Some((checked, range)) = task_list {
 570                                    ParsedMarkdownListItemType::Task(checked, range)
 571                                } else if let Some(order) = order {
 572                                    ParsedMarkdownListItemType::Ordered(order)
 573                                } else {
 574                                    ParsedMarkdownListItemType::Unordered
 575                                };
 576                                content.item_type = item_type;
 577                                content.content.push(block);
 578                            }
 579                        } else {
 580                            let block = self.parse_block().await;
 581                            if let Some(block) = block {
 582                                if let Some(list_item) = items_stack.last_mut() {
 583                                    list_item.content.extend(block);
 584                                }
 585                            }
 586                        }
 587                    }
 588
 589                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 590                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 591                        self.cursor += 1;
 592                    }
 593                }
 594                Event::End(TagEnd::Item) => {
 595                    self.cursor += 1;
 596
 597                    if let Some(current) = order {
 598                        order = Some(current + 1);
 599                    }
 600
 601                    if let Some(list_item) = items_stack.pop() {
 602                        let source_range = source_ranges
 603                            .remove(&depth)
 604                            .unwrap_or(start_item_range.clone());
 605
 606                        // We need to remove the last character of the source range, because it includes the newline character
 607                        let source_range = source_range.start..source_range.end - 1;
 608                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 609                            source_range,
 610                            content: list_item.content,
 611                            depth,
 612                            item_type: list_item.item_type,
 613                        });
 614
 615                        if let Some(index) = insertion_indices.get(&depth) {
 616                            items.insert(*index, item);
 617                            insertion_indices.remove(&depth);
 618                        } else {
 619                            items.push(item);
 620                        }
 621                    }
 622                }
 623                _ => {
 624                    if depth == 0 {
 625                        break;
 626                    }
 627                    // This can only happen if a list item starts with more then one paragraph,
 628                    // or the list item contains blocks that should be rendered after the nested list items
 629                    let block = self.parse_block().await;
 630                    if let Some(block) = block {
 631                        if let Some(list_item) = items_stack.last_mut() {
 632                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 633                            if !insertion_indices.contains_key(&depth) {
 634                                list_item.content.extend(block);
 635                                continue;
 636                            }
 637                        }
 638
 639                        // Otherwise we need to insert the block after all the nested items
 640                        // that have been parsed so far
 641                        items.extend(block);
 642                    } else {
 643                        self.cursor += 1;
 644                    }
 645                }
 646            }
 647        }
 648
 649        items
 650    }
 651
 652    #[async_recursion]
 653    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 654        let (_event, source_range) = self.previous().unwrap();
 655        let source_range = source_range.clone();
 656        let mut nested_depth = 1;
 657
 658        let mut children: Vec<ParsedMarkdownElement> = vec![];
 659
 660        while !self.eof() {
 661            let block = self.parse_block().await;
 662
 663            if let Some(block) = block {
 664                children.extend(block);
 665            } else {
 666                break;
 667            }
 668
 669            if self.eof() {
 670                break;
 671            }
 672
 673            let (current, _source_range) = self.current().unwrap();
 674            match current {
 675                // This is a nested block quote.
 676                // Record that we're in a nested block quote and continue parsing.
 677                // We don't need to advance the cursor since the next
 678                // call to `parse_block` will handle it.
 679                Event::Start(Tag::BlockQuote(_kind)) => {
 680                    nested_depth += 1;
 681                }
 682                Event::End(TagEnd::BlockQuote(_kind)) => {
 683                    nested_depth -= 1;
 684                    if nested_depth == 0 {
 685                        self.cursor += 1;
 686                        break;
 687                    }
 688                }
 689                _ => {}
 690            };
 691        }
 692
 693        ParsedMarkdownBlockQuote {
 694            source_range,
 695            children,
 696        }
 697    }
 698
 699    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 700        let (_event, source_range) = self.previous().unwrap();
 701        let source_range = source_range.clone();
 702        let mut code = String::new();
 703
 704        while !self.eof() {
 705            let (current, _source_range) = self.current().unwrap();
 706            match current {
 707                Event::Text(text) => {
 708                    code.push_str(text);
 709                    self.cursor += 1;
 710                }
 711                Event::End(TagEnd::CodeBlock) => {
 712                    self.cursor += 1;
 713                    break;
 714                }
 715                _ => {
 716                    break;
 717                }
 718            }
 719        }
 720        let highlights = if let Some(language) = &language {
 721            if let Some(registry) = &self.language_registry {
 722                let rope: language::Rope = code.as_str().into();
 723                registry
 724                    .language_for_name_or_extension(language)
 725                    .await
 726                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 727                    .ok()
 728            } else {
 729                None
 730            }
 731        } else {
 732            None
 733        };
 734
 735        ParsedMarkdownCodeBlock {
 736            source_range,
 737            contents: code.trim().to_string().into(),
 738            language,
 739            highlights,
 740        }
 741    }
 742}
 743
 744#[cfg(test)]
 745mod tests {
 746    use core::panic;
 747
 748    use super::*;
 749
 750    use gpui::BackgroundExecutor;
 751    use language::{
 752        tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry,
 753    };
 754    use pretty_assertions::assert_eq;
 755    use ParsedMarkdownListItemType::*;
 756
 757    async fn parse(input: &str) -> ParsedMarkdown {
 758        parse_markdown(input, None, None).await
 759    }
 760
 761    #[gpui::test]
 762    async fn test_headings() {
 763        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 764
 765        assert_eq!(
 766            parsed.children,
 767            vec![
 768                h1(text("Heading one", 2..13), 0..14),
 769                h2(text("Heading two", 17..28), 14..29),
 770                h3(text("Heading three", 33..46), 29..46),
 771            ]
 772        );
 773    }
 774
 775    #[gpui::test]
 776    async fn test_newlines_dont_new_paragraphs() {
 777        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 778
 779        assert_eq!(
 780            parsed.children,
 781            vec![p("Some text that is bolded and italicized", 0..46)]
 782        );
 783    }
 784
 785    #[gpui::test]
 786    async fn test_heading_with_paragraph() {
 787        let parsed = parse("# Zed\nThe editor").await;
 788
 789        assert_eq!(
 790            parsed.children,
 791            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 792        );
 793    }
 794
 795    #[gpui::test]
 796    async fn test_double_newlines_do_new_paragraphs() {
 797        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 798
 799        assert_eq!(
 800            parsed.children,
 801            vec![
 802                p("Some text that is bolded", 0..29),
 803                p("and italicized", 31..47),
 804            ]
 805        );
 806    }
 807
 808    #[gpui::test]
 809    async fn test_bold_italic_text() {
 810        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 811
 812        assert_eq!(
 813            parsed.children,
 814            vec![p("Some text that is bolded and italicized", 0..45)]
 815        );
 816    }
 817
 818    #[gpui::test]
 819    async fn test_nested_bold_strikethrough_text() {
 820        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 821
 822        assert_eq!(parsed.children.len(), 1);
 823        assert_eq!(
 824            parsed.children[0],
 825            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 826                ParsedMarkdownText {
 827                    source_range: 0..35,
 828                    contents: "Some bostrikethroughld text".to_string(),
 829                    highlights: Vec::new(),
 830                    region_ranges: Vec::new(),
 831                    regions: Vec::new(),
 832                }
 833            )])
 834        );
 835
 836        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 837            text
 838        } else {
 839            panic!("Expected a paragraph");
 840        };
 841
 842        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 843            text
 844        } else {
 845            panic!("Expected a text");
 846        };
 847
 848        assert_eq!(
 849            paragraph.highlights,
 850            vec![
 851                (
 852                    5..7,
 853                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 854                        weight: FontWeight::BOLD,
 855                        ..Default::default()
 856                    }),
 857                ),
 858                (
 859                    7..20,
 860                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 861                        weight: FontWeight::BOLD,
 862                        strikethrough: true,
 863                        ..Default::default()
 864                    }),
 865                ),
 866                (
 867                    20..22,
 868                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 869                        weight: FontWeight::BOLD,
 870                        ..Default::default()
 871                    }),
 872                ),
 873            ]
 874        );
 875    }
 876
 877    #[gpui::test]
 878    async fn test_text_with_inline_html() {
 879        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 880
 881        assert_eq!(
 882            parsed.children,
 883            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 884        );
 885    }
 886
 887    #[gpui::test]
 888    async fn test_raw_links_detection() {
 889        let parsed = parse("Checkout this https://zed.dev link").await;
 890
 891        assert_eq!(
 892            parsed.children,
 893            vec![p("Checkout this https://zed.dev link", 0..34)]
 894        );
 895    }
 896
 897    #[gpui::test]
 898    async fn test_empty_image() {
 899        let parsed = parse("![]()").await;
 900
 901        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 902            text
 903        } else {
 904            panic!("Expected a paragraph");
 905        };
 906        assert_eq!(paragraph.len(), 0);
 907    }
 908
 909    #[gpui::test]
 910    async fn test_image_links_detection() {
 911        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 912
 913        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 914            text
 915        } else {
 916            panic!("Expected a paragraph");
 917        };
 918        assert_eq!(
 919            paragraph[0],
 920            MarkdownParagraphChunk::Image(Image {
 921                source_range: 0..111,
 922                link: Link::Web {
 923                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 924                },
 925                alt_text: Some("test".into()),
 926            },)
 927        );
 928    }
 929
 930    #[gpui::test]
 931    async fn test_image_without_alt_text() {
 932        let parsed = parse("![](http://example.com/foo.png)").await;
 933
 934        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 935            text
 936        } else {
 937            panic!("Expected a paragraph");
 938        };
 939        assert_eq!(
 940            paragraph[0],
 941            MarkdownParagraphChunk::Image(Image {
 942                source_range: 0..31,
 943                link: Link::Web {
 944                    url: "http://example.com/foo.png".to_string(),
 945                },
 946                alt_text: None,
 947            },)
 948        );
 949    }
 950
 951    #[gpui::test]
 952    async fn test_image_with_alt_text_containing_formatting() {
 953        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
 954
 955        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
 956            panic!("Expected a paragraph");
 957        };
 958        assert_eq!(
 959            chunks,
 960            &[MarkdownParagraphChunk::Image(Image {
 961                source_range: 0..44,
 962                link: Link::Web {
 963                    url: "http://example.com/foo.png".to_string(),
 964                },
 965                alt_text: Some("foo bar baz".into()),
 966            }),],
 967        );
 968    }
 969
 970    #[gpui::test]
 971    async fn test_images_with_text_in_between() {
 972        let parsed = parse(
 973            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
 974        )
 975        .await;
 976
 977        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 978            text
 979        } else {
 980            panic!("Expected a paragraph");
 981        };
 982        assert_eq!(
 983            chunks,
 984            &vec![
 985                MarkdownParagraphChunk::Image(Image {
 986                    source_range: 0..81,
 987                    link: Link::Web {
 988                        url: "http://example.com/foo.png".to_string(),
 989                    },
 990                    alt_text: Some("foo".into()),
 991                }),
 992                MarkdownParagraphChunk::Text(ParsedMarkdownText {
 993                    source_range: 0..81,
 994                    contents: " Lorem Ipsum ".to_string(),
 995                    highlights: Vec::new(),
 996                    region_ranges: Vec::new(),
 997                    regions: Vec::new(),
 998                }),
 999                MarkdownParagraphChunk::Image(Image {
1000                    source_range: 0..81,
1001                    link: Link::Web {
1002                        url: "http://example.com/bar.png".to_string(),
1003                    },
1004                    alt_text: Some("bar".into()),
1005                })
1006            ]
1007        );
1008    }
1009
1010    #[gpui::test]
1011    async fn test_header_only_table() {
1012        let markdown = "\
1013| Header 1 | Header 2 |
1014|----------|----------|
1015
1016Some other content
1017";
1018
1019        let expected_table = table(
1020            0..48,
1021            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1022            vec![],
1023        );
1024
1025        assert_eq!(
1026            parse(markdown).await.children[0],
1027            ParsedMarkdownElement::Table(expected_table)
1028        );
1029    }
1030
1031    #[gpui::test]
1032    async fn test_basic_table() {
1033        let markdown = "\
1034| Header 1 | Header 2 |
1035|----------|----------|
1036| Cell 1   | Cell 2   |
1037| Cell 3   | Cell 4   |";
1038
1039        let expected_table = table(
1040            0..95,
1041            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1042            vec![
1043                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1044                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1045            ],
1046        );
1047
1048        assert_eq!(
1049            parse(markdown).await.children[0],
1050            ParsedMarkdownElement::Table(expected_table)
1051        );
1052    }
1053
1054    #[gpui::test]
1055    async fn test_list_basic() {
1056        let parsed = parse(
1057            "\
1058* Item 1
1059* Item 2
1060* Item 3
1061",
1062        )
1063        .await;
1064
1065        assert_eq!(
1066            parsed.children,
1067            vec![
1068                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1069                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1070                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1071            ],
1072        );
1073    }
1074
1075    #[gpui::test]
1076    async fn test_list_with_tasks() {
1077        let parsed = parse(
1078            "\
1079- [ ] TODO
1080- [x] Checked
1081",
1082        )
1083        .await;
1084
1085        assert_eq!(
1086            parsed.children,
1087            vec![
1088                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1089                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1090            ],
1091        );
1092    }
1093
1094    #[gpui::test]
1095    async fn test_list_with_indented_task() {
1096        let parsed = parse(
1097            "\
1098- [ ] TODO
1099  - [x] Checked
1100  - Unordered
1101  1. Number 1
1102  1. Number 2
11031. Number A
1104",
1105        )
1106        .await;
1107
1108        assert_eq!(
1109            parsed.children,
1110            vec![
1111                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1112                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1113                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1114                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1115                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1116                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1117            ],
1118        );
1119    }
1120
1121    #[gpui::test]
1122    async fn test_list_with_linebreak_is_handled_correctly() {
1123        let parsed = parse(
1124            "\
1125- [ ] Task 1
1126
1127- [x] Task 2
1128",
1129        )
1130        .await;
1131
1132        assert_eq!(
1133            parsed.children,
1134            vec![
1135                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1136                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1137            ],
1138        );
1139    }
1140
1141    #[gpui::test]
1142    async fn test_list_nested() {
1143        let parsed = parse(
1144            "\
1145* Item 1
1146* Item 2
1147* Item 3
1148
11491. Hello
11501. Two
1151   1. Three
11522. Four
11533. Five
1154
1155* First
1156  1. Hello
1157     1. Goodbyte
1158        - Inner
1159        - Inner
1160  2. Goodbyte
1161        - Next item empty
1162        -
1163* Last
1164",
1165        )
1166        .await;
1167
1168        assert_eq!(
1169            parsed.children,
1170            vec![
1171                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1172                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1173                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1174                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1175                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1176                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1177                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1178                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1179                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1180                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1181                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1182                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1183                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1184                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1185                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1186                list_item(186..190, 3, Unordered, vec![]),
1187                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1188            ]
1189        );
1190    }
1191
1192    #[gpui::test]
1193    async fn test_list_with_nested_content() {
1194        let parsed = parse(
1195            "\
1196*   This is a list item with two paragraphs.
1197
1198    This is the second paragraph in the list item.
1199",
1200        )
1201        .await;
1202
1203        assert_eq!(
1204            parsed.children,
1205            vec![list_item(
1206                0..96,
1207                1,
1208                Unordered,
1209                vec![
1210                    p("This is a list item with two paragraphs.", 4..44),
1211                    p("This is the second paragraph in the list item.", 50..97)
1212                ],
1213            ),],
1214        );
1215    }
1216
1217    #[gpui::test]
1218    async fn test_list_item_with_inline_html() {
1219        let parsed = parse(
1220            "\
1221*   This is a list item with an inline HTML <sometag>tag</sometag>.
1222",
1223        )
1224        .await;
1225
1226        assert_eq!(
1227            parsed.children,
1228            vec![list_item(
1229                0..67,
1230                1,
1231                Unordered,
1232                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1233            ),],
1234        );
1235    }
1236
1237    #[gpui::test]
1238    async fn test_nested_list_with_paragraph_inside() {
1239        let parsed = parse(
1240            "\
12411. a
1242    1. b
1243        1. c
1244
1245    text
1246
1247    1. d
1248",
1249        )
1250        .await;
1251
1252        assert_eq!(
1253            parsed.children,
1254            vec![
1255                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1256                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1257                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1258                p("text", 32..37),
1259                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1260            ],
1261        );
1262    }
1263
1264    #[gpui::test]
1265    async fn test_list_with_leading_text() {
1266        let parsed = parse(
1267            "\
1268* `code`
1269* **bold**
1270* [link](https://example.com)
1271",
1272        )
1273        .await;
1274
1275        assert_eq!(
1276            parsed.children,
1277            vec![
1278                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1279                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1280                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1281            ],
1282        );
1283    }
1284
1285    #[gpui::test]
1286    async fn test_simple_block_quote() {
1287        let parsed = parse("> Simple block quote with **styled text**").await;
1288
1289        assert_eq!(
1290            parsed.children,
1291            vec![block_quote(
1292                vec![p("Simple block quote with styled text", 2..41)],
1293                0..41
1294            )]
1295        );
1296    }
1297
1298    #[gpui::test]
1299    async fn test_simple_block_quote_with_multiple_lines() {
1300        let parsed = parse(
1301            "\
1302> # Heading
1303> More
1304> text
1305>
1306> More text
1307",
1308        )
1309        .await;
1310
1311        assert_eq!(
1312            parsed.children,
1313            vec![block_quote(
1314                vec![
1315                    h1(text("Heading", 4..11), 2..12),
1316                    p("More text", 14..26),
1317                    p("More text", 30..40)
1318                ],
1319                0..40
1320            )]
1321        );
1322    }
1323
1324    #[gpui::test]
1325    async fn test_nested_block_quote() {
1326        let parsed = parse(
1327            "\
1328> A
1329>
1330> > # B
1331>
1332> C
1333
1334More text
1335",
1336        )
1337        .await;
1338
1339        assert_eq!(
1340            parsed.children,
1341            vec![
1342                block_quote(
1343                    vec![
1344                        p("A", 2..4),
1345                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1346                        p("C", 18..20)
1347                    ],
1348                    0..20
1349                ),
1350                p("More text", 21..31)
1351            ]
1352        );
1353    }
1354
1355    #[gpui::test]
1356    async fn test_code_block() {
1357        let parsed = parse(
1358            "\
1359```
1360fn main() {
1361    return 0;
1362}
1363```
1364",
1365        )
1366        .await;
1367
1368        assert_eq!(
1369            parsed.children,
1370            vec![code_block(
1371                None,
1372                "fn main() {\n    return 0;\n}",
1373                0..35,
1374                None
1375            )]
1376        );
1377    }
1378
1379    #[gpui::test]
1380    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1381        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1382        language_registry.add(rust_lang());
1383
1384        let parsed = parse_markdown(
1385            "\
1386```rust
1387fn main() {
1388    return 0;
1389}
1390```
1391",
1392            None,
1393            Some(language_registry),
1394        )
1395        .await;
1396
1397        assert_eq!(
1398            parsed.children,
1399            vec![code_block(
1400                Some("rust".to_string()),
1401                "fn main() {\n    return 0;\n}",
1402                0..39,
1403                Some(vec![])
1404            )]
1405        );
1406    }
1407
1408    fn rust_lang() -> Arc<Language> {
1409        Arc::new(Language::new(
1410            LanguageConfig {
1411                name: "Rust".into(),
1412                matcher: LanguageMatcher {
1413                    path_suffixes: vec!["rs".into()],
1414                    ..Default::default()
1415                },
1416                collapsed_placeholder: " /* ... */ ".to_string(),
1417                ..Default::default()
1418            },
1419            Some(tree_sitter_rust::LANGUAGE.into()),
1420        ))
1421    }
1422
1423    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1424        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1425            source_range,
1426            level: HeadingLevel::H1,
1427            contents,
1428        })
1429    }
1430
1431    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1432        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1433            source_range,
1434            level: HeadingLevel::H2,
1435            contents,
1436        })
1437    }
1438
1439    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1440        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1441            source_range,
1442            level: HeadingLevel::H3,
1443            contents,
1444        })
1445    }
1446
1447    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1448        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1449    }
1450
1451    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1452        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1453            highlights: Vec::new(),
1454            region_ranges: Vec::new(),
1455            regions: Vec::new(),
1456            source_range,
1457            contents: contents.to_string(),
1458        })]
1459    }
1460
1461    fn block_quote(
1462        children: Vec<ParsedMarkdownElement>,
1463        source_range: Range<usize>,
1464    ) -> ParsedMarkdownElement {
1465        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1466            source_range,
1467            children,
1468        })
1469    }
1470
1471    fn code_block(
1472        language: Option<String>,
1473        code: &str,
1474        source_range: Range<usize>,
1475        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1476    ) -> ParsedMarkdownElement {
1477        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1478            source_range,
1479            language,
1480            contents: code.to_string().into(),
1481            highlights,
1482        })
1483    }
1484
1485    fn list_item(
1486        source_range: Range<usize>,
1487        depth: u16,
1488        item_type: ParsedMarkdownListItemType,
1489        content: Vec<ParsedMarkdownElement>,
1490    ) -> ParsedMarkdownElement {
1491        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1492            source_range,
1493            item_type,
1494            depth,
1495            content,
1496        })
1497    }
1498
1499    fn table(
1500        source_range: Range<usize>,
1501        header: ParsedMarkdownTableRow,
1502        body: Vec<ParsedMarkdownTableRow>,
1503    ) -> ParsedMarkdownTable {
1504        ParsedMarkdownTable {
1505            column_alignments: Vec::new(),
1506            source_range,
1507            header,
1508            body,
1509        }
1510    }
1511
1512    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1513        ParsedMarkdownTableRow { children }
1514    }
1515
1516    impl PartialEq for ParsedMarkdownTable {
1517        fn eq(&self, other: &Self) -> bool {
1518            self.source_range == other.source_range
1519                && self.header == other.header
1520                && self.body == other.body
1521        }
1522    }
1523
1524    impl PartialEq for ParsedMarkdownText {
1525        fn eq(&self, other: &Self) -> bool {
1526            self.source_range == other.source_range && self.contents == other.contents
1527        }
1528    }
1529}