markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::FootnoteReference(_)
 104            | Event::Start(Tag::Link { .. })
 105            | Event::Start(Tag::Emphasis)
 106            | Event::Start(Tag::Strong)
 107            | Event::Start(Tag::Strikethrough)
 108            | Event::Start(Tag::Image { .. }) => {
 109                true
 110            }
 111            _ => false,
 112        }
 113    }
 114
 115    async fn parse_document(mut self) -> Self {
 116        while !self.eof() {
 117            if let Some(block) = self.parse_block().await {
 118                self.parsed.extend(block);
 119            } else {
 120                self.cursor += 1;
 121            }
 122        }
 123        self
 124    }
 125
 126    #[async_recursion]
 127    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 128        let (current, source_range) = self.current().unwrap();
 129        let source_range = source_range.clone();
 130        match current {
 131            Event::Start(tag) => match tag {
 132                Tag::Paragraph => {
 133                    self.cursor += 1;
 134                    let text = self.parse_text(false, Some(source_range));
 135                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 136                }
 137                Tag::Heading { level, .. } => {
 138                    let level = *level;
 139                    self.cursor += 1;
 140                    let heading = self.parse_heading(level);
 141                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 142                }
 143                Tag::Table(alignment) => {
 144                    let alignment = alignment.clone();
 145                    self.cursor += 1;
 146                    let table = self.parse_table(alignment);
 147                    Some(vec![ParsedMarkdownElement::Table(table)])
 148                }
 149                Tag::List(order) => {
 150                    let order = *order;
 151                    self.cursor += 1;
 152                    let list = self.parse_list(order).await;
 153                    Some(list)
 154                }
 155                Tag::BlockQuote(_kind) => {
 156                    self.cursor += 1;
 157                    let block_quote = self.parse_block_quote().await;
 158                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 159                }
 160                Tag::CodeBlock(kind) => {
 161                    let language = match kind {
 162                        pulldown_cmark::CodeBlockKind::Indented => None,
 163                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 164                            if language.is_empty() {
 165                                None
 166                            } else {
 167                                Some(language.to_string())
 168                            }
 169                        }
 170                    };
 171
 172                    self.cursor += 1;
 173
 174                    let code_block = self.parse_code_block(language).await;
 175                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 176                }
 177                _ => None,
 178            },
 179            Event::Rule => {
 180                let source_range = source_range.clone();
 181                self.cursor += 1;
 182                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 183            }
 184            _ => None,
 185        }
 186    }
 187
 188    fn parse_text(
 189        &mut self,
 190        should_complete_on_soft_break: bool,
 191        source_range: Option<Range<usize>>,
 192    ) -> MarkdownParagraph {
 193        let source_range = source_range.unwrap_or_else(|| {
 194            self.current()
 195                .map(|(_, range)| range.clone())
 196                .unwrap_or_default()
 197        });
 198
 199        let mut markdown_text_like = Vec::new();
 200        let mut text = String::new();
 201        let mut bold_depth = 0;
 202        let mut italic_depth = 0;
 203        let mut strikethrough_depth = 0;
 204        let mut link: Option<Link> = None;
 205        let mut image: Option<Image> = None;
 206        let mut region_ranges: Vec<Range<usize>> = vec![];
 207        let mut regions: Vec<ParsedRegion> = vec![];
 208        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 209        let mut link_urls: Vec<String> = vec![];
 210        let mut link_ranges: Vec<Range<usize>> = vec![];
 211
 212        loop {
 213            if self.eof() {
 214                break;
 215            }
 216
 217            let (current, _) = self.current().unwrap();
 218            let prev_len = text.len();
 219            match current {
 220                Event::SoftBreak => {
 221                    if should_complete_on_soft_break {
 222                        break;
 223                    }
 224                    text.push(' ');
 225                }
 226
 227                Event::HardBreak => {
 228                    text.push('\n');
 229                }
 230
 231                // We want to ignore any inline HTML tags in the text but keep
 232                // the text between them
 233                Event::InlineHtml(_) => {}
 234
 235                Event::Text(t) => {
 236                    text.push_str(t.as_ref());
 237                    let mut style = MarkdownHighlightStyle::default();
 238
 239                    if bold_depth > 0 {
 240                        style.weight = FontWeight::BOLD;
 241                    }
 242
 243                    if italic_depth > 0 {
 244                        style.italic = true;
 245                    }
 246
 247                    if strikethrough_depth > 0 {
 248                        style.strikethrough = true;
 249                    }
 250
 251                    let last_run_len = if let Some(link) = link.clone() {
 252                        region_ranges.push(prev_len..text.len());
 253                        regions.push(ParsedRegion {
 254                            code: false,
 255                            link: Some(link),
 256                        });
 257                        style.underline = true;
 258                        prev_len
 259                    } else {
 260                        // Manually scan for links
 261                        let mut finder = linkify::LinkFinder::new();
 262                        finder.kinds(&[linkify::LinkKind::Url]);
 263                        let mut last_link_len = prev_len;
 264                        for link in finder.links(t) {
 265                            let start = link.start();
 266                            let end = link.end();
 267                            let range = (prev_len + start)..(prev_len + end);
 268                            link_ranges.push(range.clone());
 269                            link_urls.push(link.as_str().to_string());
 270
 271                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 272                            if style != MarkdownHighlightStyle::default()
 273                                && last_link_len < link.start()
 274                            {
 275                                highlights.push((
 276                                    last_link_len..link.start(),
 277                                    MarkdownHighlight::Style(style.clone()),
 278                                ));
 279                            }
 280
 281                            highlights.push((
 282                                range.clone(),
 283                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 284                                    underline: true,
 285                                    ..style
 286                                }),
 287                            ));
 288                            region_ranges.push(range.clone());
 289                            regions.push(ParsedRegion {
 290                                code: false,
 291                                link: Some(Link::Web {
 292                                    url: link.as_str().to_string(),
 293                                }),
 294                            });
 295                            last_link_len = end;
 296                        }
 297                        last_link_len
 298                    };
 299
 300                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 301                        let mut new_highlight = true;
 302                        if let Some((last_range, last_style)) = highlights.last_mut() {
 303                            if last_range.end == last_run_len
 304                                && last_style == &MarkdownHighlight::Style(style.clone())
 305                            {
 306                                last_range.end = text.len();
 307                                new_highlight = false;
 308                            }
 309                        }
 310                        if new_highlight {
 311                            highlights.push((
 312                                last_run_len..text.len(),
 313                                MarkdownHighlight::Style(style.clone()),
 314                            ));
 315                        }
 316                    }
 317                    if let Some(image) = image.as_mut() {
 318                        text.truncate(text.len() - t.len());
 319                        image.set_alt_text(t.to_string().into());
 320                        if !text.is_empty() {
 321                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 322                                source_range: source_range.clone(),
 323                                contents: text.clone(),
 324                                highlights: highlights.clone(),
 325                                region_ranges: region_ranges.clone(),
 326                                regions: regions.clone(),
 327                            });
 328                            text = String::new();
 329                            highlights = vec![];
 330                            region_ranges = vec![];
 331                            regions = vec![];
 332                            markdown_text_like.push(parsed_regions);
 333                        }
 334
 335                        let parsed_image = MarkdownParagraphChunk::Image(image.clone());
 336                        markdown_text_like.push(parsed_image);
 337                        style = MarkdownHighlightStyle::default();
 338                        style.underline = true;
 339                    }
 340                }
 341                Event::Code(t) => {
 342                    text.push_str(t.as_ref());
 343                    region_ranges.push(prev_len..text.len());
 344
 345                    if link.is_some() {
 346                        highlights.push((
 347                            prev_len..text.len(),
 348                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 349                                underline: true,
 350                                ..Default::default()
 351                            }),
 352                        ));
 353                    }
 354                    regions.push(ParsedRegion {
 355                        code: true,
 356                        link: link.clone(),
 357                    });
 358                }
 359                Event::Start(tag) => match tag {
 360                    Tag::Emphasis => italic_depth += 1,
 361                    Tag::Strong => bold_depth += 1,
 362                    Tag::Strikethrough => strikethrough_depth += 1,
 363                    Tag::Link { dest_url, .. } => {
 364                        link = Link::identify(
 365                            self.file_location_directory.clone(),
 366                            dest_url.to_string(),
 367                        );
 368                    }
 369                    Tag::Image { dest_url, .. } => {
 370                        image = Image::identify(
 371                            dest_url.to_string(),
 372                            source_range.clone(),
 373                            self.file_location_directory.clone(),
 374                        );
 375                    }
 376                    _ => {
 377                        break;
 378                    }
 379                },
 380
 381                Event::End(tag) => match tag {
 382                    TagEnd::Emphasis => italic_depth -= 1,
 383                    TagEnd::Strong => bold_depth -= 1,
 384                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 385                    TagEnd::Link => {
 386                        link = None;
 387                    }
 388                    TagEnd::Image => {
 389                        image = None;
 390                    }
 391                    TagEnd::Paragraph => {
 392                        self.cursor += 1;
 393                        break;
 394                    }
 395                    _ => {
 396                        break;
 397                    }
 398                },
 399                _ => {
 400                    break;
 401                }
 402            }
 403
 404            self.cursor += 1;
 405        }
 406        if !text.is_empty() {
 407            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 408                source_range: source_range.clone(),
 409                contents: text,
 410                highlights,
 411                regions,
 412                region_ranges,
 413            }));
 414        }
 415        markdown_text_like
 416    }
 417
 418    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 419        let (_event, source_range) = self.previous().unwrap();
 420        let source_range = source_range.clone();
 421        let text = self.parse_text(true, None);
 422
 423        // Advance past the heading end tag
 424        self.cursor += 1;
 425
 426        ParsedMarkdownHeading {
 427            source_range: source_range.clone(),
 428            level: match level {
 429                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 430                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 431                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 432                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 433                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 434                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 435            },
 436            contents: text,
 437        }
 438    }
 439
 440    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 441        let (_event, source_range) = self.previous().unwrap();
 442        let source_range = source_range.clone();
 443        let mut header = ParsedMarkdownTableRow::new();
 444        let mut body = vec![];
 445        let mut current_row = vec![];
 446        let mut in_header = true;
 447        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 448
 449        loop {
 450            if self.eof() {
 451                break;
 452            }
 453
 454            let (current, source_range) = self.current().unwrap();
 455            let source_range = source_range.clone();
 456            match current {
 457                Event::Start(Tag::TableHead)
 458                | Event::Start(Tag::TableRow)
 459                | Event::End(TagEnd::TableCell) => {
 460                    self.cursor += 1;
 461                }
 462                Event::Start(Tag::TableCell) => {
 463                    self.cursor += 1;
 464                    let cell_contents = self.parse_text(false, Some(source_range));
 465                    current_row.push(cell_contents);
 466                }
 467                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 468                    self.cursor += 1;
 469                    let new_row = std::mem::take(&mut current_row);
 470                    if in_header {
 471                        header.children = new_row;
 472                        in_header = false;
 473                    } else {
 474                        let row = ParsedMarkdownTableRow::with_children(new_row);
 475                        body.push(row);
 476                    }
 477                }
 478                Event::End(TagEnd::Table) => {
 479                    self.cursor += 1;
 480                    break;
 481                }
 482                _ => {
 483                    break;
 484                }
 485            }
 486        }
 487
 488        ParsedMarkdownTable {
 489            source_range,
 490            header,
 491            body,
 492            column_alignments,
 493        }
 494    }
 495
 496    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 497        match alignment {
 498            Alignment::None => ParsedMarkdownTableAlignment::None,
 499            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 500            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 501            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 502        }
 503    }
 504
 505    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 506        let (_, list_source_range) = self.previous().unwrap();
 507
 508        let mut items = Vec::new();
 509        let mut items_stack = vec![MarkdownListItem::default()];
 510        let mut depth = 1;
 511        let mut order = order;
 512        let mut order_stack = Vec::new();
 513
 514        let mut insertion_indices = FxHashMap::default();
 515        let mut source_ranges = FxHashMap::default();
 516        let mut start_item_range = list_source_range.clone();
 517
 518        while !self.eof() {
 519            let (current, source_range) = self.current().unwrap();
 520            match current {
 521                Event::Start(Tag::List(new_order)) => {
 522                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 523                        insertion_indices.insert(depth, items.len());
 524                    }
 525
 526                    // We will use the start of the nested list as the end for the current item's range,
 527                    // because we don't care about the hierarchy of list items
 528                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 529                        e.insert(start_item_range.start..source_range.start);
 530                    }
 531
 532                    order_stack.push(order);
 533                    order = *new_order;
 534                    self.cursor += 1;
 535                    depth += 1;
 536                }
 537                Event::End(TagEnd::List(_)) => {
 538                    order = order_stack.pop().flatten();
 539                    self.cursor += 1;
 540                    depth -= 1;
 541
 542                    if depth == 0 {
 543                        break;
 544                    }
 545                }
 546                Event::Start(Tag::Item) => {
 547                    start_item_range = source_range.clone();
 548
 549                    self.cursor += 1;
 550                    items_stack.push(MarkdownListItem::default());
 551
 552                    let mut task_list = None;
 553                    // Check for task list marker (`- [ ]` or `- [x]`)
 554                    if let Some(event) = self.current_event() {
 555                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 556                        if event == &Event::Start(Tag::Paragraph) {
 557                            self.cursor += 1;
 558                        }
 559
 560                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 561                            task_list = Some((*checked, range.clone()));
 562                            self.cursor += 1;
 563                        }
 564                    }
 565
 566                    if let Some((event, range)) = self.current() {
 567                        // This is a plain list item.
 568                        // For example `- some text` or `1. [Docs](./docs.md)`
 569                        if MarkdownParser::is_text_like(event) {
 570                            let text = self.parse_text(false, Some(range.clone()));
 571                            let block = ParsedMarkdownElement::Paragraph(text);
 572                            if let Some(content) = items_stack.last_mut() {
 573                                let item_type = if let Some((checked, range)) = task_list {
 574                                    ParsedMarkdownListItemType::Task(checked, range)
 575                                } else if let Some(order) = order {
 576                                    ParsedMarkdownListItemType::Ordered(order)
 577                                } else {
 578                                    ParsedMarkdownListItemType::Unordered
 579                                };
 580                                content.item_type = item_type;
 581                                content.content.push(block);
 582                            }
 583                        } else {
 584                            let block = self.parse_block().await;
 585                            if let Some(block) = block {
 586                                if let Some(list_item) = items_stack.last_mut() {
 587                                    list_item.content.extend(block);
 588                                }
 589                            }
 590                        }
 591                    }
 592
 593                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 594                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 595                        self.cursor += 1;
 596                    }
 597                }
 598                Event::End(TagEnd::Item) => {
 599                    self.cursor += 1;
 600
 601                    if let Some(current) = order {
 602                        order = Some(current + 1);
 603                    }
 604
 605                    if let Some(list_item) = items_stack.pop() {
 606                        let source_range = source_ranges
 607                            .remove(&depth)
 608                            .unwrap_or(start_item_range.clone());
 609
 610                        // We need to remove the last character of the source range, because it includes the newline character
 611                        let source_range = source_range.start..source_range.end - 1;
 612                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 613                            source_range,
 614                            content: list_item.content,
 615                            depth,
 616                            item_type: list_item.item_type,
 617                        });
 618
 619                        if let Some(index) = insertion_indices.get(&depth) {
 620                            items.insert(*index, item);
 621                            insertion_indices.remove(&depth);
 622                        } else {
 623                            items.push(item);
 624                        }
 625                    }
 626                }
 627                _ => {
 628                    if depth == 0 {
 629                        break;
 630                    }
 631                    // This can only happen if a list item starts with more then one paragraph,
 632                    // or the list item contains blocks that should be rendered after the nested list items
 633                    let block = self.parse_block().await;
 634                    if let Some(block) = block {
 635                        if let Some(list_item) = items_stack.last_mut() {
 636                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 637                            if !insertion_indices.contains_key(&depth) {
 638                                list_item.content.extend(block);
 639                                continue;
 640                            }
 641                        }
 642
 643                        // Otherwise we need to insert the block after all the nested items
 644                        // that have been parsed so far
 645                        items.extend(block);
 646                    } else {
 647                        self.cursor += 1;
 648                    }
 649                }
 650            }
 651        }
 652
 653        items
 654    }
 655
 656    #[async_recursion]
 657    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 658        let (_event, source_range) = self.previous().unwrap();
 659        let source_range = source_range.clone();
 660        let mut nested_depth = 1;
 661
 662        let mut children: Vec<ParsedMarkdownElement> = vec![];
 663
 664        while !self.eof() {
 665            let block = self.parse_block().await;
 666
 667            if let Some(block) = block {
 668                children.extend(block);
 669            } else {
 670                break;
 671            }
 672
 673            if self.eof() {
 674                break;
 675            }
 676
 677            let (current, _source_range) = self.current().unwrap();
 678            match current {
 679                // This is a nested block quote.
 680                // Record that we're in a nested block quote and continue parsing.
 681                // We don't need to advance the cursor since the next
 682                // call to `parse_block` will handle it.
 683                Event::Start(Tag::BlockQuote(_kind)) => {
 684                    nested_depth += 1;
 685                }
 686                Event::End(TagEnd::BlockQuote(_kind)) => {
 687                    nested_depth -= 1;
 688                    if nested_depth == 0 {
 689                        self.cursor += 1;
 690                        break;
 691                    }
 692                }
 693                _ => {}
 694            };
 695        }
 696
 697        ParsedMarkdownBlockQuote {
 698            source_range,
 699            children,
 700        }
 701    }
 702
 703    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 704        let (_event, source_range) = self.previous().unwrap();
 705        let source_range = source_range.clone();
 706        let mut code = String::new();
 707
 708        while !self.eof() {
 709            let (current, _source_range) = self.current().unwrap();
 710            match current {
 711                Event::Text(text) => {
 712                    code.push_str(text);
 713                    self.cursor += 1;
 714                }
 715                Event::End(TagEnd::CodeBlock) => {
 716                    self.cursor += 1;
 717                    break;
 718                }
 719                _ => {
 720                    break;
 721                }
 722            }
 723        }
 724        let highlights = if let Some(language) = &language {
 725            if let Some(registry) = &self.language_registry {
 726                let rope: language::Rope = code.as_str().into();
 727                registry
 728                    .language_for_name_or_extension(language)
 729                    .await
 730                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 731                    .ok()
 732            } else {
 733                None
 734            }
 735        } else {
 736            None
 737        };
 738
 739        ParsedMarkdownCodeBlock {
 740            source_range,
 741            contents: code.trim().to_string().into(),
 742            language,
 743            highlights,
 744        }
 745    }
 746}
 747
 748#[cfg(test)]
 749mod tests {
 750    use core::panic;
 751
 752    use super::*;
 753
 754    use gpui::BackgroundExecutor;
 755    use language::{
 756        tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry,
 757    };
 758    use pretty_assertions::assert_eq;
 759    use ParsedMarkdownListItemType::*;
 760
 761    async fn parse(input: &str) -> ParsedMarkdown {
 762        parse_markdown(input, None, None).await
 763    }
 764
 765    #[gpui::test]
 766    async fn test_headings() {
 767        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 768
 769        assert_eq!(
 770            parsed.children,
 771            vec![
 772                h1(text("Heading one", 2..13), 0..14),
 773                h2(text("Heading two", 17..28), 14..29),
 774                h3(text("Heading three", 33..46), 29..46),
 775            ]
 776        );
 777    }
 778
 779    #[gpui::test]
 780    async fn test_newlines_dont_new_paragraphs() {
 781        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 782
 783        assert_eq!(
 784            parsed.children,
 785            vec![p("Some text that is bolded and italicized", 0..46)]
 786        );
 787    }
 788
 789    #[gpui::test]
 790    async fn test_heading_with_paragraph() {
 791        let parsed = parse("# Zed\nThe editor").await;
 792
 793        assert_eq!(
 794            parsed.children,
 795            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 796        );
 797    }
 798
 799    #[gpui::test]
 800    async fn test_double_newlines_do_new_paragraphs() {
 801        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 802
 803        assert_eq!(
 804            parsed.children,
 805            vec![
 806                p("Some text that is bolded", 0..29),
 807                p("and italicized", 31..47),
 808            ]
 809        );
 810    }
 811
 812    #[gpui::test]
 813    async fn test_bold_italic_text() {
 814        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 815
 816        assert_eq!(
 817            parsed.children,
 818            vec![p("Some text that is bolded and italicized", 0..45)]
 819        );
 820    }
 821
 822    #[gpui::test]
 823    async fn test_nested_bold_strikethrough_text() {
 824        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 825
 826        assert_eq!(parsed.children.len(), 1);
 827        assert_eq!(
 828            parsed.children[0],
 829            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 830                ParsedMarkdownText {
 831                    source_range: 0..35,
 832                    contents: "Some bostrikethroughld text".to_string(),
 833                    highlights: Vec::new(),
 834                    region_ranges: Vec::new(),
 835                    regions: Vec::new(),
 836                }
 837            )])
 838        );
 839
 840        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 841            text
 842        } else {
 843            panic!("Expected a paragraph");
 844        };
 845
 846        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 847            text
 848        } else {
 849            panic!("Expected a text");
 850        };
 851
 852        assert_eq!(
 853            paragraph.highlights,
 854            vec![
 855                (
 856                    5..7,
 857                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 858                        weight: FontWeight::BOLD,
 859                        ..Default::default()
 860                    }),
 861                ),
 862                (
 863                    7..20,
 864                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 865                        weight: FontWeight::BOLD,
 866                        strikethrough: true,
 867                        ..Default::default()
 868                    }),
 869                ),
 870                (
 871                    20..22,
 872                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 873                        weight: FontWeight::BOLD,
 874                        ..Default::default()
 875                    }),
 876                ),
 877            ]
 878        );
 879    }
 880
 881    #[gpui::test]
 882    async fn test_text_with_inline_html() {
 883        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 884
 885        assert_eq!(
 886            parsed.children,
 887            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 888        );
 889    }
 890
 891    #[gpui::test]
 892    async fn test_raw_links_detection() {
 893        let parsed = parse("Checkout this https://zed.dev link").await;
 894
 895        assert_eq!(
 896            parsed.children,
 897            vec![p("Checkout this https://zed.dev link", 0..34)]
 898        );
 899    }
 900
 901    #[gpui::test]
 902    async fn test_empty_image() {
 903        let parsed = parse("![]()").await;
 904
 905        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 906            text
 907        } else {
 908            panic!("Expected a paragraph");
 909        };
 910        assert_eq!(paragraph.len(), 0);
 911    }
 912
 913    #[gpui::test]
 914    async fn test_image_links_detection() {
 915        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 916
 917        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 918            text
 919        } else {
 920            panic!("Expected a paragraph");
 921        };
 922        assert_eq!(
 923            paragraph[0],
 924            MarkdownParagraphChunk::Image(Image {
 925                source_range: 0..111,
 926                link: Link::Web {
 927                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 928                },
 929                alt_text: Some("test".into()),
 930            },)
 931        );
 932    }
 933
 934    #[gpui::test]
 935    async fn test_header_only_table() {
 936        let markdown = "\
 937| Header 1 | Header 2 |
 938|----------|----------|
 939
 940Some other content
 941";
 942
 943        let expected_table = table(
 944            0..48,
 945            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 946            vec![],
 947        );
 948
 949        assert_eq!(
 950            parse(markdown).await.children[0],
 951            ParsedMarkdownElement::Table(expected_table)
 952        );
 953    }
 954
 955    #[gpui::test]
 956    async fn test_basic_table() {
 957        let markdown = "\
 958| Header 1 | Header 2 |
 959|----------|----------|
 960| Cell 1   | Cell 2   |
 961| Cell 3   | Cell 4   |";
 962
 963        let expected_table = table(
 964            0..95,
 965            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 966            vec![
 967                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 968                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 969            ],
 970        );
 971
 972        assert_eq!(
 973            parse(markdown).await.children[0],
 974            ParsedMarkdownElement::Table(expected_table)
 975        );
 976    }
 977
 978    #[gpui::test]
 979    async fn test_list_basic() {
 980        let parsed = parse(
 981            "\
 982* Item 1
 983* Item 2
 984* Item 3
 985",
 986        )
 987        .await;
 988
 989        assert_eq!(
 990            parsed.children,
 991            vec![
 992                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
 993                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
 994                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
 995            ],
 996        );
 997    }
 998
 999    #[gpui::test]
1000    async fn test_list_with_tasks() {
1001        let parsed = parse(
1002            "\
1003- [ ] TODO
1004- [x] Checked
1005",
1006        )
1007        .await;
1008
1009        assert_eq!(
1010            parsed.children,
1011            vec![
1012                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1013                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1014            ],
1015        );
1016    }
1017
1018    #[gpui::test]
1019    async fn test_list_with_indented_task() {
1020        let parsed = parse(
1021            "\
1022- [ ] TODO
1023  - [x] Checked
1024  - Unordered
1025  1. Number 1
1026  1. Number 2
10271. Number A
1028",
1029        )
1030        .await;
1031
1032        assert_eq!(
1033            parsed.children,
1034            vec![
1035                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1036                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1037                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1038                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1039                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1040                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1041            ],
1042        );
1043    }
1044
1045    #[gpui::test]
1046    async fn test_list_with_linebreak_is_handled_correctly() {
1047        let parsed = parse(
1048            "\
1049- [ ] Task 1
1050
1051- [x] Task 2
1052",
1053        )
1054        .await;
1055
1056        assert_eq!(
1057            parsed.children,
1058            vec![
1059                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1060                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1061            ],
1062        );
1063    }
1064
1065    #[gpui::test]
1066    async fn test_list_nested() {
1067        let parsed = parse(
1068            "\
1069* Item 1
1070* Item 2
1071* Item 3
1072
10731. Hello
10741. Two
1075   1. Three
10762. Four
10773. Five
1078
1079* First
1080  1. Hello
1081     1. Goodbyte
1082        - Inner
1083        - Inner
1084  2. Goodbyte
1085        - Next item empty
1086        -
1087* Last
1088",
1089        )
1090        .await;
1091
1092        assert_eq!(
1093            parsed.children,
1094            vec![
1095                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1096                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1097                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1098                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1099                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1100                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1101                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1102                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1103                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1104                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1105                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1106                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1107                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1108                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1109                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1110                list_item(186..190, 3, Unordered, vec![]),
1111                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1112            ]
1113        );
1114    }
1115
1116    #[gpui::test]
1117    async fn test_list_with_nested_content() {
1118        let parsed = parse(
1119            "\
1120*   This is a list item with two paragraphs.
1121
1122    This is the second paragraph in the list item.
1123",
1124        )
1125        .await;
1126
1127        assert_eq!(
1128            parsed.children,
1129            vec![list_item(
1130                0..96,
1131                1,
1132                Unordered,
1133                vec![
1134                    p("This is a list item with two paragraphs.", 4..44),
1135                    p("This is the second paragraph in the list item.", 50..97)
1136                ],
1137            ),],
1138        );
1139    }
1140
1141    #[gpui::test]
1142    async fn test_list_item_with_inline_html() {
1143        let parsed = parse(
1144            "\
1145*   This is a list item with an inline HTML <sometag>tag</sometag>.
1146",
1147        )
1148        .await;
1149
1150        assert_eq!(
1151            parsed.children,
1152            vec![list_item(
1153                0..67,
1154                1,
1155                Unordered,
1156                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1157            ),],
1158        );
1159    }
1160
1161    #[gpui::test]
1162    async fn test_nested_list_with_paragraph_inside() {
1163        let parsed = parse(
1164            "\
11651. a
1166    1. b
1167        1. c
1168
1169    text
1170
1171    1. d
1172",
1173        )
1174        .await;
1175
1176        assert_eq!(
1177            parsed.children,
1178            vec![
1179                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1180                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1181                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1182                p("text", 32..37),
1183                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1184            ],
1185        );
1186    }
1187
1188    #[gpui::test]
1189    async fn test_list_with_leading_text() {
1190        let parsed = parse(
1191            "\
1192* `code`
1193* **bold**
1194* [link](https://example.com)
1195",
1196        )
1197        .await;
1198
1199        assert_eq!(
1200            parsed.children,
1201            vec![
1202                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1203                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1204                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1205            ],
1206        );
1207    }
1208
1209    #[gpui::test]
1210    async fn test_simple_block_quote() {
1211        let parsed = parse("> Simple block quote with **styled text**").await;
1212
1213        assert_eq!(
1214            parsed.children,
1215            vec![block_quote(
1216                vec![p("Simple block quote with styled text", 2..41)],
1217                0..41
1218            )]
1219        );
1220    }
1221
1222    #[gpui::test]
1223    async fn test_simple_block_quote_with_multiple_lines() {
1224        let parsed = parse(
1225            "\
1226> # Heading
1227> More
1228> text
1229>
1230> More text
1231",
1232        )
1233        .await;
1234
1235        assert_eq!(
1236            parsed.children,
1237            vec![block_quote(
1238                vec![
1239                    h1(text("Heading", 4..11), 2..12),
1240                    p("More text", 14..26),
1241                    p("More text", 30..40)
1242                ],
1243                0..40
1244            )]
1245        );
1246    }
1247
1248    #[gpui::test]
1249    async fn test_nested_block_quote() {
1250        let parsed = parse(
1251            "\
1252> A
1253>
1254> > # B
1255>
1256> C
1257
1258More text
1259",
1260        )
1261        .await;
1262
1263        assert_eq!(
1264            parsed.children,
1265            vec![
1266                block_quote(
1267                    vec![
1268                        p("A", 2..4),
1269                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1270                        p("C", 18..20)
1271                    ],
1272                    0..20
1273                ),
1274                p("More text", 21..31)
1275            ]
1276        );
1277    }
1278
1279    #[gpui::test]
1280    async fn test_code_block() {
1281        let parsed = parse(
1282            "\
1283```
1284fn main() {
1285    return 0;
1286}
1287```
1288",
1289        )
1290        .await;
1291
1292        assert_eq!(
1293            parsed.children,
1294            vec![code_block(
1295                None,
1296                "fn main() {\n    return 0;\n}",
1297                0..35,
1298                None
1299            )]
1300        );
1301    }
1302
1303    #[gpui::test]
1304    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1305        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1306        language_registry.add(rust_lang());
1307
1308        let parsed = parse_markdown(
1309            "\
1310```rust
1311fn main() {
1312    return 0;
1313}
1314```
1315",
1316            None,
1317            Some(language_registry),
1318        )
1319        .await;
1320
1321        assert_eq!(
1322            parsed.children,
1323            vec![code_block(
1324                Some("rust".to_string()),
1325                "fn main() {\n    return 0;\n}",
1326                0..39,
1327                Some(vec![])
1328            )]
1329        );
1330    }
1331
1332    fn rust_lang() -> Arc<Language> {
1333        Arc::new(Language::new(
1334            LanguageConfig {
1335                name: "Rust".into(),
1336                matcher: LanguageMatcher {
1337                    path_suffixes: vec!["rs".into()],
1338                    ..Default::default()
1339                },
1340                collapsed_placeholder: " /* ... */ ".to_string(),
1341                ..Default::default()
1342            },
1343            Some(tree_sitter_rust::LANGUAGE.into()),
1344        ))
1345    }
1346
1347    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1348        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1349            source_range,
1350            level: HeadingLevel::H1,
1351            contents,
1352        })
1353    }
1354
1355    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1356        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1357            source_range,
1358            level: HeadingLevel::H2,
1359            contents,
1360        })
1361    }
1362
1363    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1364        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1365            source_range,
1366            level: HeadingLevel::H3,
1367            contents,
1368        })
1369    }
1370
1371    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1372        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1373    }
1374
1375    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1376        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1377            highlights: Vec::new(),
1378            region_ranges: Vec::new(),
1379            regions: Vec::new(),
1380            source_range,
1381            contents: contents.to_string(),
1382        })]
1383    }
1384
1385    fn block_quote(
1386        children: Vec<ParsedMarkdownElement>,
1387        source_range: Range<usize>,
1388    ) -> ParsedMarkdownElement {
1389        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1390            source_range,
1391            children,
1392        })
1393    }
1394
1395    fn code_block(
1396        language: Option<String>,
1397        code: &str,
1398        source_range: Range<usize>,
1399        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1400    ) -> ParsedMarkdownElement {
1401        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1402            source_range,
1403            language,
1404            contents: code.to_string().into(),
1405            highlights,
1406        })
1407    }
1408
1409    fn list_item(
1410        source_range: Range<usize>,
1411        depth: u16,
1412        item_type: ParsedMarkdownListItemType,
1413        content: Vec<ParsedMarkdownElement>,
1414    ) -> ParsedMarkdownElement {
1415        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1416            source_range,
1417            item_type,
1418            depth,
1419            content,
1420        })
1421    }
1422
1423    fn table(
1424        source_range: Range<usize>,
1425        header: ParsedMarkdownTableRow,
1426        body: Vec<ParsedMarkdownTableRow>,
1427    ) -> ParsedMarkdownTable {
1428        ParsedMarkdownTable {
1429            column_alignments: Vec::new(),
1430            source_range,
1431            header,
1432            body,
1433        }
1434    }
1435
1436    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1437        ParsedMarkdownTableRow { children }
1438    }
1439
1440    impl PartialEq for ParsedMarkdownTable {
1441        fn eq(&self, other: &Self) -> bool {
1442            self.source_range == other.source_range
1443                && self.header == other.header
1444                && self.body == other.body
1445        }
1446    }
1447
1448    impl PartialEq for ParsedMarkdownText {
1449        fn eq(&self, other: &Self) -> bool {
1450            self.source_range == other.source_range && self.contents == other.contents
1451        }
1452    }
1453}