markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event<'_>> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::InlineHtml(_)
 104            | Event::FootnoteReference(_)
 105            | Event::Start(Tag::Link { .. })
 106            | Event::Start(Tag::Emphasis)
 107            | Event::Start(Tag::Strong)
 108            | Event::Start(Tag::Strikethrough)
 109            | Event::Start(Tag::Image { .. }) => {
 110                true
 111            }
 112            _ => false,
 113        }
 114    }
 115
 116    async fn parse_document(mut self) -> Self {
 117        while !self.eof() {
 118            if let Some(block) = self.parse_block().await {
 119                self.parsed.extend(block);
 120            } else {
 121                self.cursor += 1;
 122            }
 123        }
 124        self
 125    }
 126
 127    #[async_recursion]
 128    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 129        let (current, source_range) = self.current().unwrap();
 130        let source_range = source_range.clone();
 131        match current {
 132            Event::Start(tag) => match tag {
 133                Tag::Paragraph => {
 134                    self.cursor += 1;
 135                    let text = self.parse_text(false, Some(source_range));
 136                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 137                }
 138                Tag::Heading { level, .. } => {
 139                    let level = *level;
 140                    self.cursor += 1;
 141                    let heading = self.parse_heading(level);
 142                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 143                }
 144                Tag::Table(alignment) => {
 145                    let alignment = alignment.clone();
 146                    self.cursor += 1;
 147                    let table = self.parse_table(alignment);
 148                    Some(vec![ParsedMarkdownElement::Table(table)])
 149                }
 150                Tag::List(order) => {
 151                    let order = *order;
 152                    self.cursor += 1;
 153                    let list = self.parse_list(order).await;
 154                    Some(list)
 155                }
 156                Tag::BlockQuote(_kind) => {
 157                    self.cursor += 1;
 158                    let block_quote = self.parse_block_quote().await;
 159                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 160                }
 161                Tag::CodeBlock(kind) => {
 162                    let language = match kind {
 163                        pulldown_cmark::CodeBlockKind::Indented => None,
 164                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 165                            if language.is_empty() {
 166                                None
 167                            } else {
 168                                Some(language.to_string())
 169                            }
 170                        }
 171                    };
 172
 173                    self.cursor += 1;
 174
 175                    let code_block = self.parse_code_block(language).await;
 176                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 177                }
 178                _ => None,
 179            },
 180            Event::Rule => {
 181                let source_range = source_range.clone();
 182                self.cursor += 1;
 183                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 184            }
 185            _ => None,
 186        }
 187    }
 188
 189    fn parse_text(
 190        &mut self,
 191        should_complete_on_soft_break: bool,
 192        source_range: Option<Range<usize>>,
 193    ) -> MarkdownParagraph {
 194        let source_range = source_range.unwrap_or_else(|| {
 195            self.current()
 196                .map(|(_, range)| range.clone())
 197                .unwrap_or_default()
 198        });
 199
 200        let mut markdown_text_like = Vec::new();
 201        let mut text = String::new();
 202        let mut bold_depth = 0;
 203        let mut italic_depth = 0;
 204        let mut strikethrough_depth = 0;
 205        let mut link: Option<Link> = None;
 206        let mut image: Option<Image> = None;
 207        let mut region_ranges: Vec<Range<usize>> = vec![];
 208        let mut regions: Vec<ParsedRegion> = vec![];
 209        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 210        let mut link_urls: Vec<String> = vec![];
 211        let mut link_ranges: Vec<Range<usize>> = vec![];
 212
 213        loop {
 214            if self.eof() {
 215                break;
 216            }
 217
 218            let (current, _) = self.current().unwrap();
 219            let prev_len = text.len();
 220            match current {
 221                Event::SoftBreak => {
 222                    if should_complete_on_soft_break {
 223                        break;
 224                    }
 225                    text.push(' ');
 226                }
 227
 228                Event::HardBreak => {
 229                    text.push('\n');
 230                }
 231
 232                // We want to ignore any inline HTML tags in the text but keep
 233                // the text between them
 234                Event::InlineHtml(_) => {}
 235
 236                Event::Text(t) => {
 237                    text.push_str(t.as_ref());
 238                    let mut style = MarkdownHighlightStyle::default();
 239
 240                    if bold_depth > 0 {
 241                        style.weight = FontWeight::BOLD;
 242                    }
 243
 244                    if italic_depth > 0 {
 245                        style.italic = true;
 246                    }
 247
 248                    if strikethrough_depth > 0 {
 249                        style.strikethrough = true;
 250                    }
 251
 252                    let last_run_len = if let Some(link) = link.clone() {
 253                        region_ranges.push(prev_len..text.len());
 254                        regions.push(ParsedRegion {
 255                            code: false,
 256                            link: Some(link),
 257                        });
 258                        style.underline = true;
 259                        prev_len
 260                    } else {
 261                        // Manually scan for links
 262                        let mut finder = linkify::LinkFinder::new();
 263                        finder.kinds(&[linkify::LinkKind::Url]);
 264                        let mut last_link_len = prev_len;
 265                        for link in finder.links(t) {
 266                            let start = link.start();
 267                            let end = link.end();
 268                            let range = (prev_len + start)..(prev_len + end);
 269                            link_ranges.push(range.clone());
 270                            link_urls.push(link.as_str().to_string());
 271
 272                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 273                            if style != MarkdownHighlightStyle::default()
 274                                && last_link_len < link.start()
 275                            {
 276                                highlights.push((
 277                                    last_link_len..link.start(),
 278                                    MarkdownHighlight::Style(style.clone()),
 279                                ));
 280                            }
 281
 282                            highlights.push((
 283                                range.clone(),
 284                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 285                                    underline: true,
 286                                    ..style
 287                                }),
 288                            ));
 289                            region_ranges.push(range.clone());
 290                            regions.push(ParsedRegion {
 291                                code: false,
 292                                link: Some(Link::Web {
 293                                    url: link.as_str().to_string(),
 294                                }),
 295                            });
 296                            last_link_len = end;
 297                        }
 298                        last_link_len
 299                    };
 300
 301                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 302                        let mut new_highlight = true;
 303                        if let Some((last_range, last_style)) = highlights.last_mut()
 304                            && last_range.end == last_run_len
 305                            && last_style == &MarkdownHighlight::Style(style.clone())
 306                        {
 307                            last_range.end = text.len();
 308                            new_highlight = false;
 309                        }
 310                        if new_highlight {
 311                            highlights.push((
 312                                last_run_len..text.len(),
 313                                MarkdownHighlight::Style(style.clone()),
 314                            ));
 315                        }
 316                    }
 317                }
 318                Event::Code(t) => {
 319                    text.push_str(t.as_ref());
 320                    region_ranges.push(prev_len..text.len());
 321
 322                    if link.is_some() {
 323                        highlights.push((
 324                            prev_len..text.len(),
 325                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 326                                underline: true,
 327                                ..Default::default()
 328                            }),
 329                        ));
 330                    }
 331                    regions.push(ParsedRegion {
 332                        code: true,
 333                        link: link.clone(),
 334                    });
 335                }
 336                Event::Start(tag) => match tag {
 337                    Tag::Emphasis => italic_depth += 1,
 338                    Tag::Strong => bold_depth += 1,
 339                    Tag::Strikethrough => strikethrough_depth += 1,
 340                    Tag::Link { dest_url, .. } => {
 341                        link = Link::identify(
 342                            self.file_location_directory.clone(),
 343                            dest_url.to_string(),
 344                        );
 345                    }
 346                    Tag::Image { dest_url, .. } => {
 347                        if !text.is_empty() {
 348                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 349                                source_range: source_range.clone(),
 350                                contents: text.clone(),
 351                                highlights: highlights.clone(),
 352                                region_ranges: region_ranges.clone(),
 353                                regions: regions.clone(),
 354                            });
 355                            text = String::new();
 356                            highlights = vec![];
 357                            region_ranges = vec![];
 358                            regions = vec![];
 359                            markdown_text_like.push(parsed_regions);
 360                        }
 361                        image = Image::identify(
 362                            dest_url.to_string(),
 363                            source_range.clone(),
 364                            self.file_location_directory.clone(),
 365                        );
 366                    }
 367                    _ => {
 368                        break;
 369                    }
 370                },
 371
 372                Event::End(tag) => match tag {
 373                    TagEnd::Emphasis => italic_depth -= 1,
 374                    TagEnd::Strong => bold_depth -= 1,
 375                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 376                    TagEnd::Link => {
 377                        link = None;
 378                    }
 379                    TagEnd::Image => {
 380                        if let Some(mut image) = image.take() {
 381                            if !text.is_empty() {
 382                                image.alt_text = Some(std::mem::take(&mut text).into());
 383                            }
 384                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 385                        }
 386                    }
 387                    TagEnd::Paragraph => {
 388                        self.cursor += 1;
 389                        break;
 390                    }
 391                    _ => {
 392                        break;
 393                    }
 394                },
 395                _ => {
 396                    break;
 397                }
 398            }
 399
 400            self.cursor += 1;
 401        }
 402        if !text.is_empty() {
 403            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 404                source_range: source_range.clone(),
 405                contents: text,
 406                highlights,
 407                regions,
 408                region_ranges,
 409            }));
 410        }
 411        markdown_text_like
 412    }
 413
 414    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 415        let (_event, source_range) = self.previous().unwrap();
 416        let source_range = source_range.clone();
 417        let text = self.parse_text(true, None);
 418
 419        // Advance past the heading end tag
 420        self.cursor += 1;
 421
 422        ParsedMarkdownHeading {
 423            source_range: source_range.clone(),
 424            level: match level {
 425                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 426                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 427                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 428                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 429                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 430                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 431            },
 432            contents: text,
 433        }
 434    }
 435
 436    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 437        let (_event, source_range) = self.previous().unwrap();
 438        let source_range = source_range.clone();
 439        let mut header = ParsedMarkdownTableRow::new();
 440        let mut body = vec![];
 441        let mut current_row = vec![];
 442        let mut in_header = true;
 443        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 444
 445        loop {
 446            if self.eof() {
 447                break;
 448            }
 449
 450            let (current, source_range) = self.current().unwrap();
 451            let source_range = source_range.clone();
 452            match current {
 453                Event::Start(Tag::TableHead)
 454                | Event::Start(Tag::TableRow)
 455                | Event::End(TagEnd::TableCell) => {
 456                    self.cursor += 1;
 457                }
 458                Event::Start(Tag::TableCell) => {
 459                    self.cursor += 1;
 460                    let cell_contents = self.parse_text(false, Some(source_range));
 461                    current_row.push(cell_contents);
 462                }
 463                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 464                    self.cursor += 1;
 465                    let new_row = std::mem::take(&mut current_row);
 466                    if in_header {
 467                        header.children = new_row;
 468                        in_header = false;
 469                    } else {
 470                        let row = ParsedMarkdownTableRow::with_children(new_row);
 471                        body.push(row);
 472                    }
 473                }
 474                Event::End(TagEnd::Table) => {
 475                    self.cursor += 1;
 476                    break;
 477                }
 478                _ => {
 479                    break;
 480                }
 481            }
 482        }
 483
 484        ParsedMarkdownTable {
 485            source_range,
 486            header,
 487            body,
 488            column_alignments,
 489        }
 490    }
 491
 492    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 493        match alignment {
 494            Alignment::None => ParsedMarkdownTableAlignment::None,
 495            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 496            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 497            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 498        }
 499    }
 500
 501    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 502        let (_, list_source_range) = self.previous().unwrap();
 503
 504        let mut items = Vec::new();
 505        let mut items_stack = vec![MarkdownListItem::default()];
 506        let mut depth = 1;
 507        let mut order = order;
 508        let mut order_stack = Vec::new();
 509
 510        let mut insertion_indices = FxHashMap::default();
 511        let mut source_ranges = FxHashMap::default();
 512        let mut start_item_range = list_source_range.clone();
 513
 514        while !self.eof() {
 515            let (current, source_range) = self.current().unwrap();
 516            match current {
 517                Event::Start(Tag::List(new_order)) => {
 518                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 519                        insertion_indices.insert(depth, items.len());
 520                    }
 521
 522                    // We will use the start of the nested list as the end for the current item's range,
 523                    // because we don't care about the hierarchy of list items
 524                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 525                        e.insert(start_item_range.start..source_range.start);
 526                    }
 527
 528                    order_stack.push(order);
 529                    order = *new_order;
 530                    self.cursor += 1;
 531                    depth += 1;
 532                }
 533                Event::End(TagEnd::List(_)) => {
 534                    order = order_stack.pop().flatten();
 535                    self.cursor += 1;
 536                    depth -= 1;
 537
 538                    if depth == 0 {
 539                        break;
 540                    }
 541                }
 542                Event::Start(Tag::Item) => {
 543                    start_item_range = source_range.clone();
 544
 545                    self.cursor += 1;
 546                    items_stack.push(MarkdownListItem::default());
 547
 548                    let mut task_list = None;
 549                    // Check for task list marker (`- [ ]` or `- [x]`)
 550                    if let Some(event) = self.current_event() {
 551                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 552                        if event == &Event::Start(Tag::Paragraph) {
 553                            self.cursor += 1;
 554                        }
 555
 556                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 557                            task_list = Some((*checked, range.clone()));
 558                            self.cursor += 1;
 559                        }
 560                    }
 561
 562                    if let Some((event, range)) = self.current() {
 563                        // This is a plain list item.
 564                        // For example `- some text` or `1. [Docs](./docs.md)`
 565                        if MarkdownParser::is_text_like(event) {
 566                            let text = self.parse_text(false, Some(range.clone()));
 567                            let block = ParsedMarkdownElement::Paragraph(text);
 568                            if let Some(content) = items_stack.last_mut() {
 569                                let item_type = if let Some((checked, range)) = task_list {
 570                                    ParsedMarkdownListItemType::Task(checked, range)
 571                                } else if let Some(order) = order {
 572                                    ParsedMarkdownListItemType::Ordered(order)
 573                                } else {
 574                                    ParsedMarkdownListItemType::Unordered
 575                                };
 576                                content.item_type = item_type;
 577                                content.content.push(block);
 578                            }
 579                        } else {
 580                            let block = self.parse_block().await;
 581                            if let Some(block) = block
 582                                && let Some(list_item) = items_stack.last_mut()
 583                            {
 584                                list_item.content.extend(block);
 585                            }
 586                        }
 587                    }
 588
 589                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 590                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 591                        self.cursor += 1;
 592                    }
 593                }
 594                Event::End(TagEnd::Item) => {
 595                    self.cursor += 1;
 596
 597                    if let Some(current) = order {
 598                        order = Some(current + 1);
 599                    }
 600
 601                    if let Some(list_item) = items_stack.pop() {
 602                        let source_range = source_ranges
 603                            .remove(&depth)
 604                            .unwrap_or(start_item_range.clone());
 605
 606                        // We need to remove the last character of the source range, because it includes the newline character
 607                        let source_range = source_range.start..source_range.end - 1;
 608                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 609                            source_range,
 610                            content: list_item.content,
 611                            depth,
 612                            item_type: list_item.item_type,
 613                        });
 614
 615                        if let Some(index) = insertion_indices.get(&depth) {
 616                            items.insert(*index, item);
 617                            insertion_indices.remove(&depth);
 618                        } else {
 619                            items.push(item);
 620                        }
 621                    }
 622                }
 623                _ => {
 624                    if depth == 0 {
 625                        break;
 626                    }
 627                    // This can only happen if a list item starts with more then one paragraph,
 628                    // or the list item contains blocks that should be rendered after the nested list items
 629                    let block = self.parse_block().await;
 630                    if let Some(block) = block {
 631                        if let Some(list_item) = items_stack.last_mut() {
 632                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 633                            if !insertion_indices.contains_key(&depth) {
 634                                list_item.content.extend(block);
 635                                continue;
 636                            }
 637                        }
 638
 639                        // Otherwise we need to insert the block after all the nested items
 640                        // that have been parsed so far
 641                        items.extend(block);
 642                    } else {
 643                        self.cursor += 1;
 644                    }
 645                }
 646            }
 647        }
 648
 649        items
 650    }
 651
 652    #[async_recursion]
 653    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 654        let (_event, source_range) = self.previous().unwrap();
 655        let source_range = source_range.clone();
 656        let mut nested_depth = 1;
 657
 658        let mut children: Vec<ParsedMarkdownElement> = vec![];
 659
 660        while !self.eof() {
 661            let block = self.parse_block().await;
 662
 663            if let Some(block) = block {
 664                children.extend(block);
 665            } else {
 666                break;
 667            }
 668
 669            if self.eof() {
 670                break;
 671            }
 672
 673            let (current, _source_range) = self.current().unwrap();
 674            match current {
 675                // This is a nested block quote.
 676                // Record that we're in a nested block quote and continue parsing.
 677                // We don't need to advance the cursor since the next
 678                // call to `parse_block` will handle it.
 679                Event::Start(Tag::BlockQuote(_kind)) => {
 680                    nested_depth += 1;
 681                }
 682                Event::End(TagEnd::BlockQuote(_kind)) => {
 683                    nested_depth -= 1;
 684                    if nested_depth == 0 {
 685                        self.cursor += 1;
 686                        break;
 687                    }
 688                }
 689                _ => {}
 690            };
 691        }
 692
 693        ParsedMarkdownBlockQuote {
 694            source_range,
 695            children,
 696        }
 697    }
 698
 699    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 700        let (_event, source_range) = self.previous().unwrap();
 701        let source_range = source_range.clone();
 702        let mut code = String::new();
 703
 704        while !self.eof() {
 705            let (current, _source_range) = self.current().unwrap();
 706            match current {
 707                Event::Text(text) => {
 708                    code.push_str(text);
 709                    self.cursor += 1;
 710                }
 711                Event::End(TagEnd::CodeBlock) => {
 712                    self.cursor += 1;
 713                    break;
 714                }
 715                _ => {
 716                    break;
 717                }
 718            }
 719        }
 720
 721        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 722
 723        let highlights = if let Some(language) = &language {
 724            if let Some(registry) = &self.language_registry {
 725                let rope: language::Rope = code.as_str().into();
 726                registry
 727                    .language_for_name_or_extension(language)
 728                    .await
 729                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 730                    .ok()
 731            } else {
 732                None
 733            }
 734        } else {
 735            None
 736        };
 737
 738        ParsedMarkdownCodeBlock {
 739            source_range,
 740            contents: code.into(),
 741            language,
 742            highlights,
 743        }
 744    }
 745}
 746
 747#[cfg(test)]
 748mod tests {
 749    use core::panic;
 750
 751    use super::*;
 752
 753    use ParsedMarkdownListItemType::*;
 754    use gpui::BackgroundExecutor;
 755    use language::{
 756        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
 757    };
 758    use pretty_assertions::assert_eq;
 759
 760    async fn parse(input: &str) -> ParsedMarkdown {
 761        parse_markdown(input, None, None).await
 762    }
 763
 764    #[gpui::test]
 765    async fn test_headings() {
 766        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 767
 768        assert_eq!(
 769            parsed.children,
 770            vec![
 771                h1(text("Heading one", 2..13), 0..14),
 772                h2(text("Heading two", 17..28), 14..29),
 773                h3(text("Heading three", 33..46), 29..46),
 774            ]
 775        );
 776    }
 777
 778    #[gpui::test]
 779    async fn test_newlines_dont_new_paragraphs() {
 780        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 781
 782        assert_eq!(
 783            parsed.children,
 784            vec![p("Some text that is bolded and italicized", 0..46)]
 785        );
 786    }
 787
 788    #[gpui::test]
 789    async fn test_heading_with_paragraph() {
 790        let parsed = parse("# Zed\nThe editor").await;
 791
 792        assert_eq!(
 793            parsed.children,
 794            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 795        );
 796    }
 797
 798    #[gpui::test]
 799    async fn test_double_newlines_do_new_paragraphs() {
 800        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 801
 802        assert_eq!(
 803            parsed.children,
 804            vec![
 805                p("Some text that is bolded", 0..29),
 806                p("and italicized", 31..47),
 807            ]
 808        );
 809    }
 810
 811    #[gpui::test]
 812    async fn test_bold_italic_text() {
 813        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 814
 815        assert_eq!(
 816            parsed.children,
 817            vec![p("Some text that is bolded and italicized", 0..45)]
 818        );
 819    }
 820
 821    #[gpui::test]
 822    async fn test_nested_bold_strikethrough_text() {
 823        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 824
 825        assert_eq!(parsed.children.len(), 1);
 826        assert_eq!(
 827            parsed.children[0],
 828            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 829                ParsedMarkdownText {
 830                    source_range: 0..35,
 831                    contents: "Some bostrikethroughld text".to_string(),
 832                    highlights: Vec::new(),
 833                    region_ranges: Vec::new(),
 834                    regions: Vec::new(),
 835                }
 836            )])
 837        );
 838
 839        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 840            text
 841        } else {
 842            panic!("Expected a paragraph");
 843        };
 844
 845        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 846            text
 847        } else {
 848            panic!("Expected a text");
 849        };
 850
 851        assert_eq!(
 852            paragraph.highlights,
 853            vec![
 854                (
 855                    5..7,
 856                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 857                        weight: FontWeight::BOLD,
 858                        ..Default::default()
 859                    }),
 860                ),
 861                (
 862                    7..20,
 863                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 864                        weight: FontWeight::BOLD,
 865                        strikethrough: true,
 866                        ..Default::default()
 867                    }),
 868                ),
 869                (
 870                    20..22,
 871                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 872                        weight: FontWeight::BOLD,
 873                        ..Default::default()
 874                    }),
 875                ),
 876            ]
 877        );
 878    }
 879
 880    #[gpui::test]
 881    async fn test_text_with_inline_html() {
 882        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 883
 884        assert_eq!(
 885            parsed.children,
 886            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 887        );
 888    }
 889
 890    #[gpui::test]
 891    async fn test_raw_links_detection() {
 892        let parsed = parse("Checkout this https://zed.dev link").await;
 893
 894        assert_eq!(
 895            parsed.children,
 896            vec![p("Checkout this https://zed.dev link", 0..34)]
 897        );
 898    }
 899
 900    #[gpui::test]
 901    async fn test_empty_image() {
 902        let parsed = parse("![]()").await;
 903
 904        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 905            text
 906        } else {
 907            panic!("Expected a paragraph");
 908        };
 909        assert_eq!(paragraph.len(), 0);
 910    }
 911
 912    #[gpui::test]
 913    async fn test_image_links_detection() {
 914        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 915
 916        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 917            text
 918        } else {
 919            panic!("Expected a paragraph");
 920        };
 921        assert_eq!(
 922            paragraph[0],
 923            MarkdownParagraphChunk::Image(Image {
 924                source_range: 0..111,
 925                link: Link::Web {
 926                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 927                },
 928                alt_text: Some("test".into()),
 929            },)
 930        );
 931    }
 932
 933    #[gpui::test]
 934    async fn test_image_without_alt_text() {
 935        let parsed = parse("![](http://example.com/foo.png)").await;
 936
 937        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 938            text
 939        } else {
 940            panic!("Expected a paragraph");
 941        };
 942        assert_eq!(
 943            paragraph[0],
 944            MarkdownParagraphChunk::Image(Image {
 945                source_range: 0..31,
 946                link: Link::Web {
 947                    url: "http://example.com/foo.png".to_string(),
 948                },
 949                alt_text: None,
 950            },)
 951        );
 952    }
 953
 954    #[gpui::test]
 955    async fn test_image_with_alt_text_containing_formatting() {
 956        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
 957
 958        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
 959            panic!("Expected a paragraph");
 960        };
 961        assert_eq!(
 962            chunks,
 963            &[MarkdownParagraphChunk::Image(Image {
 964                source_range: 0..44,
 965                link: Link::Web {
 966                    url: "http://example.com/foo.png".to_string(),
 967                },
 968                alt_text: Some("foo bar baz".into()),
 969            }),],
 970        );
 971    }
 972
 973    #[gpui::test]
 974    async fn test_images_with_text_in_between() {
 975        let parsed = parse(
 976            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
 977        )
 978        .await;
 979
 980        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 981            text
 982        } else {
 983            panic!("Expected a paragraph");
 984        };
 985        assert_eq!(
 986            chunks,
 987            &vec![
 988                MarkdownParagraphChunk::Image(Image {
 989                    source_range: 0..81,
 990                    link: Link::Web {
 991                        url: "http://example.com/foo.png".to_string(),
 992                    },
 993                    alt_text: Some("foo".into()),
 994                }),
 995                MarkdownParagraphChunk::Text(ParsedMarkdownText {
 996                    source_range: 0..81,
 997                    contents: " Lorem Ipsum ".to_string(),
 998                    highlights: Vec::new(),
 999                    region_ranges: Vec::new(),
1000                    regions: Vec::new(),
1001                }),
1002                MarkdownParagraphChunk::Image(Image {
1003                    source_range: 0..81,
1004                    link: Link::Web {
1005                        url: "http://example.com/bar.png".to_string(),
1006                    },
1007                    alt_text: Some("bar".into()),
1008                })
1009            ]
1010        );
1011    }
1012
1013    #[gpui::test]
1014    async fn test_header_only_table() {
1015        let markdown = "\
1016| Header 1 | Header 2 |
1017|----------|----------|
1018
1019Some other content
1020";
1021
1022        let expected_table = table(
1023            0..48,
1024            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1025            vec![],
1026        );
1027
1028        assert_eq!(
1029            parse(markdown).await.children[0],
1030            ParsedMarkdownElement::Table(expected_table)
1031        );
1032    }
1033
1034    #[gpui::test]
1035    async fn test_basic_table() {
1036        let markdown = "\
1037| Header 1 | Header 2 |
1038|----------|----------|
1039| Cell 1   | Cell 2   |
1040| Cell 3   | Cell 4   |";
1041
1042        let expected_table = table(
1043            0..95,
1044            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1045            vec![
1046                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1047                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1048            ],
1049        );
1050
1051        assert_eq!(
1052            parse(markdown).await.children[0],
1053            ParsedMarkdownElement::Table(expected_table)
1054        );
1055    }
1056
1057    #[gpui::test]
1058    async fn test_list_basic() {
1059        let parsed = parse(
1060            "\
1061* Item 1
1062* Item 2
1063* Item 3
1064",
1065        )
1066        .await;
1067
1068        assert_eq!(
1069            parsed.children,
1070            vec![
1071                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1072                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1073                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1074            ],
1075        );
1076    }
1077
1078    #[gpui::test]
1079    async fn test_list_with_tasks() {
1080        let parsed = parse(
1081            "\
1082- [ ] TODO
1083- [x] Checked
1084",
1085        )
1086        .await;
1087
1088        assert_eq!(
1089            parsed.children,
1090            vec![
1091                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1092                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1093            ],
1094        );
1095    }
1096
1097    #[gpui::test]
1098    async fn test_list_with_indented_task() {
1099        let parsed = parse(
1100            "\
1101- [ ] TODO
1102  - [x] Checked
1103  - Unordered
1104  1. Number 1
1105  1. Number 2
11061. Number A
1107",
1108        )
1109        .await;
1110
1111        assert_eq!(
1112            parsed.children,
1113            vec![
1114                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1115                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1116                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1117                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1118                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1119                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1120            ],
1121        );
1122    }
1123
1124    #[gpui::test]
1125    async fn test_list_with_linebreak_is_handled_correctly() {
1126        let parsed = parse(
1127            "\
1128- [ ] Task 1
1129
1130- [x] Task 2
1131",
1132        )
1133        .await;
1134
1135        assert_eq!(
1136            parsed.children,
1137            vec![
1138                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1139                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1140            ],
1141        );
1142    }
1143
1144    #[gpui::test]
1145    async fn test_list_nested() {
1146        let parsed = parse(
1147            "\
1148* Item 1
1149* Item 2
1150* Item 3
1151
11521. Hello
11531. Two
1154   1. Three
11552. Four
11563. Five
1157
1158* First
1159  1. Hello
1160     1. Goodbyte
1161        - Inner
1162        - Inner
1163  2. Goodbyte
1164        - Next item empty
1165        -
1166* Last
1167",
1168        )
1169        .await;
1170
1171        assert_eq!(
1172            parsed.children,
1173            vec![
1174                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1175                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1176                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1177                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1178                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1179                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1180                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1181                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1182                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1183                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1184                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1185                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1186                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1187                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1188                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1189                list_item(186..190, 3, Unordered, vec![]),
1190                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1191            ]
1192        );
1193    }
1194
1195    #[gpui::test]
1196    async fn test_list_with_nested_content() {
1197        let parsed = parse(
1198            "\
1199*   This is a list item with two paragraphs.
1200
1201    This is the second paragraph in the list item.
1202",
1203        )
1204        .await;
1205
1206        assert_eq!(
1207            parsed.children,
1208            vec![list_item(
1209                0..96,
1210                1,
1211                Unordered,
1212                vec![
1213                    p("This is a list item with two paragraphs.", 4..44),
1214                    p("This is the second paragraph in the list item.", 50..97)
1215                ],
1216            ),],
1217        );
1218    }
1219
1220    #[gpui::test]
1221    async fn test_list_item_with_inline_html() {
1222        let parsed = parse(
1223            "\
1224*   This is a list item with an inline HTML <sometag>tag</sometag>.
1225",
1226        )
1227        .await;
1228
1229        assert_eq!(
1230            parsed.children,
1231            vec![list_item(
1232                0..67,
1233                1,
1234                Unordered,
1235                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1236            ),],
1237        );
1238    }
1239
1240    #[gpui::test]
1241    async fn test_nested_list_with_paragraph_inside() {
1242        let parsed = parse(
1243            "\
12441. a
1245    1. b
1246        1. c
1247
1248    text
1249
1250    1. d
1251",
1252        )
1253        .await;
1254
1255        assert_eq!(
1256            parsed.children,
1257            vec![
1258                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1259                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1260                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1261                p("text", 32..37),
1262                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1263            ],
1264        );
1265    }
1266
1267    #[gpui::test]
1268    async fn test_list_with_leading_text() {
1269        let parsed = parse(
1270            "\
1271* `code`
1272* **bold**
1273* [link](https://example.com)
1274",
1275        )
1276        .await;
1277
1278        assert_eq!(
1279            parsed.children,
1280            vec![
1281                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1282                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1283                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1284            ],
1285        );
1286    }
1287
1288    #[gpui::test]
1289    async fn test_simple_block_quote() {
1290        let parsed = parse("> Simple block quote with **styled text**").await;
1291
1292        assert_eq!(
1293            parsed.children,
1294            vec![block_quote(
1295                vec![p("Simple block quote with styled text", 2..41)],
1296                0..41
1297            )]
1298        );
1299    }
1300
1301    #[gpui::test]
1302    async fn test_simple_block_quote_with_multiple_lines() {
1303        let parsed = parse(
1304            "\
1305> # Heading
1306> More
1307> text
1308>
1309> More text
1310",
1311        )
1312        .await;
1313
1314        assert_eq!(
1315            parsed.children,
1316            vec![block_quote(
1317                vec![
1318                    h1(text("Heading", 4..11), 2..12),
1319                    p("More text", 14..26),
1320                    p("More text", 30..40)
1321                ],
1322                0..40
1323            )]
1324        );
1325    }
1326
1327    #[gpui::test]
1328    async fn test_nested_block_quote() {
1329        let parsed = parse(
1330            "\
1331> A
1332>
1333> > # B
1334>
1335> C
1336
1337More text
1338",
1339        )
1340        .await;
1341
1342        assert_eq!(
1343            parsed.children,
1344            vec![
1345                block_quote(
1346                    vec![
1347                        p("A", 2..4),
1348                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1349                        p("C", 18..20)
1350                    ],
1351                    0..20
1352                ),
1353                p("More text", 21..31)
1354            ]
1355        );
1356    }
1357
1358    #[gpui::test]
1359    async fn test_code_block() {
1360        let parsed = parse(
1361            "\
1362```
1363fn main() {
1364    return 0;
1365}
1366```
1367",
1368        )
1369        .await;
1370
1371        assert_eq!(
1372            parsed.children,
1373            vec![code_block(
1374                None,
1375                "fn main() {\n    return 0;\n}",
1376                0..35,
1377                None
1378            )]
1379        );
1380    }
1381
1382    #[gpui::test]
1383    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1384        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1385        language_registry.add(rust_lang());
1386
1387        let parsed = parse_markdown(
1388            "\
1389```rust
1390fn main() {
1391    return 0;
1392}
1393```
1394",
1395            None,
1396            Some(language_registry),
1397        )
1398        .await;
1399
1400        assert_eq!(
1401            parsed.children,
1402            vec![code_block(
1403                Some("rust".to_string()),
1404                "fn main() {\n    return 0;\n}",
1405                0..39,
1406                Some(vec![])
1407            )]
1408        );
1409    }
1410
1411    fn rust_lang() -> Arc<Language> {
1412        Arc::new(Language::new(
1413            LanguageConfig {
1414                name: "Rust".into(),
1415                matcher: LanguageMatcher {
1416                    path_suffixes: vec!["rs".into()],
1417                    ..Default::default()
1418                },
1419                collapsed_placeholder: " /* ... */ ".to_string(),
1420                ..Default::default()
1421            },
1422            Some(tree_sitter_rust::LANGUAGE.into()),
1423        ))
1424    }
1425
1426    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1427        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1428            source_range,
1429            level: HeadingLevel::H1,
1430            contents,
1431        })
1432    }
1433
1434    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1435        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1436            source_range,
1437            level: HeadingLevel::H2,
1438            contents,
1439        })
1440    }
1441
1442    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1443        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1444            source_range,
1445            level: HeadingLevel::H3,
1446            contents,
1447        })
1448    }
1449
1450    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1451        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1452    }
1453
1454    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1455        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1456            highlights: Vec::new(),
1457            region_ranges: Vec::new(),
1458            regions: Vec::new(),
1459            source_range,
1460            contents: contents.to_string(),
1461        })]
1462    }
1463
1464    fn block_quote(
1465        children: Vec<ParsedMarkdownElement>,
1466        source_range: Range<usize>,
1467    ) -> ParsedMarkdownElement {
1468        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1469            source_range,
1470            children,
1471        })
1472    }
1473
1474    fn code_block(
1475        language: Option<String>,
1476        code: &str,
1477        source_range: Range<usize>,
1478        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1479    ) -> ParsedMarkdownElement {
1480        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1481            source_range,
1482            language,
1483            contents: code.to_string().into(),
1484            highlights,
1485        })
1486    }
1487
1488    fn list_item(
1489        source_range: Range<usize>,
1490        depth: u16,
1491        item_type: ParsedMarkdownListItemType,
1492        content: Vec<ParsedMarkdownElement>,
1493    ) -> ParsedMarkdownElement {
1494        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1495            source_range,
1496            item_type,
1497            depth,
1498            content,
1499        })
1500    }
1501
1502    fn table(
1503        source_range: Range<usize>,
1504        header: ParsedMarkdownTableRow,
1505        body: Vec<ParsedMarkdownTableRow>,
1506    ) -> ParsedMarkdownTable {
1507        ParsedMarkdownTable {
1508            column_alignments: Vec::new(),
1509            source_range,
1510            header,
1511            body,
1512        }
1513    }
1514
1515    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1516        ParsedMarkdownTableRow { children }
1517    }
1518
1519    impl PartialEq for ParsedMarkdownTable {
1520        fn eq(&self, other: &Self) -> bool {
1521            self.source_range == other.source_range
1522                && self.header == other.header
1523                && self.body == other.body
1524        }
1525    }
1526
1527    impl PartialEq for ParsedMarkdownText {
1528        fn eq(&self, other: &Self) -> bool {
1529            self.source_range == other.source_range && self.contents == other.contents
1530        }
1531    }
1532}