markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::FootnoteReference(_)
 104            | Event::Start(Tag::Link { .. })
 105            | Event::Start(Tag::Emphasis)
 106            | Event::Start(Tag::Strong)
 107            | Event::Start(Tag::Strikethrough)
 108            | Event::Start(Tag::Image { .. }) => {
 109                true
 110            }
 111            _ => false,
 112        }
 113    }
 114
 115    async fn parse_document(mut self) -> Self {
 116        while !self.eof() {
 117            if let Some(block) = self.parse_block().await {
 118                self.parsed.extend(block);
 119            } else {
 120                self.cursor += 1;
 121            }
 122        }
 123        self
 124    }
 125
 126    #[async_recursion]
 127    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 128        let (current, source_range) = self.current().unwrap();
 129        let source_range = source_range.clone();
 130        match current {
 131            Event::Start(tag) => match tag {
 132                Tag::Paragraph => {
 133                    self.cursor += 1;
 134                    let text = self.parse_text(false, Some(source_range));
 135                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 136                }
 137                Tag::Heading { level, .. } => {
 138                    let level = *level;
 139                    self.cursor += 1;
 140                    let heading = self.parse_heading(level);
 141                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 142                }
 143                Tag::Table(alignment) => {
 144                    let alignment = alignment.clone();
 145                    self.cursor += 1;
 146                    let table = self.parse_table(alignment);
 147                    Some(vec![ParsedMarkdownElement::Table(table)])
 148                }
 149                Tag::List(order) => {
 150                    let order = *order;
 151                    self.cursor += 1;
 152                    let list = self.parse_list(order).await;
 153                    Some(list)
 154                }
 155                Tag::BlockQuote(_kind) => {
 156                    self.cursor += 1;
 157                    let block_quote = self.parse_block_quote().await;
 158                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 159                }
 160                Tag::CodeBlock(kind) => {
 161                    let language = match kind {
 162                        pulldown_cmark::CodeBlockKind::Indented => None,
 163                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 164                            if language.is_empty() {
 165                                None
 166                            } else {
 167                                Some(language.to_string())
 168                            }
 169                        }
 170                    };
 171
 172                    self.cursor += 1;
 173
 174                    let code_block = self.parse_code_block(language).await;
 175                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 176                }
 177                _ => None,
 178            },
 179            Event::Rule => {
 180                let source_range = source_range.clone();
 181                self.cursor += 1;
 182                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 183            }
 184            _ => None,
 185        }
 186    }
 187
 188    fn parse_text(
 189        &mut self,
 190        should_complete_on_soft_break: bool,
 191        source_range: Option<Range<usize>>,
 192    ) -> MarkdownParagraph {
 193        let source_range = source_range.unwrap_or_else(|| {
 194            self.current()
 195                .map(|(_, range)| range.clone())
 196                .unwrap_or_default()
 197        });
 198
 199        let mut markdown_text_like = Vec::new();
 200        let mut text = String::new();
 201        let mut bold_depth = 0;
 202        let mut italic_depth = 0;
 203        let mut strikethrough_depth = 0;
 204        let mut link: Option<Link> = None;
 205        let mut image: Option<Image> = None;
 206        let mut region_ranges: Vec<Range<usize>> = vec![];
 207        let mut regions: Vec<ParsedRegion> = vec![];
 208        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 209        let mut link_urls: Vec<String> = vec![];
 210        let mut link_ranges: Vec<Range<usize>> = vec![];
 211
 212        loop {
 213            if self.eof() {
 214                break;
 215            }
 216
 217            let (current, _source_range) = self.current().unwrap();
 218            let prev_len = text.len();
 219            match current {
 220                Event::SoftBreak => {
 221                    if should_complete_on_soft_break {
 222                        break;
 223                    }
 224                    text.push(' ');
 225                }
 226
 227                Event::HardBreak => {
 228                    text.push('\n');
 229                }
 230
 231                // We want to ignore any inline HTML tags in the text but keep
 232                // the text between them
 233                Event::InlineHtml(_) => {}
 234
 235                Event::Text(t) => {
 236                    text.push_str(t.as_ref());
 237                    let mut style = MarkdownHighlightStyle::default();
 238
 239                    if bold_depth > 0 {
 240                        style.weight = FontWeight::BOLD;
 241                    }
 242
 243                    if italic_depth > 0 {
 244                        style.italic = true;
 245                    }
 246
 247                    if strikethrough_depth > 0 {
 248                        style.strikethrough = true;
 249                    }
 250
 251                    let last_run_len = if let Some(link) = link.clone() {
 252                        region_ranges.push(prev_len..text.len());
 253                        regions.push(ParsedRegion {
 254                            code: false,
 255                            link: Some(link),
 256                        });
 257                        style.underline = true;
 258                        prev_len
 259                    } else {
 260                        // Manually scan for links
 261                        let mut finder = linkify::LinkFinder::new();
 262                        finder.kinds(&[linkify::LinkKind::Url]);
 263                        let mut last_link_len = prev_len;
 264                        for link in finder.links(t) {
 265                            let start = link.start();
 266                            let end = link.end();
 267                            let range = (prev_len + start)..(prev_len + end);
 268                            link_ranges.push(range.clone());
 269                            link_urls.push(link.as_str().to_string());
 270
 271                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 272                            if style != MarkdownHighlightStyle::default()
 273                                && last_link_len < link.start()
 274                            {
 275                                highlights.push((
 276                                    last_link_len..link.start(),
 277                                    MarkdownHighlight::Style(style.clone()),
 278                                ));
 279                            }
 280
 281                            highlights.push((
 282                                range.clone(),
 283                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 284                                    underline: true,
 285                                    ..style
 286                                }),
 287                            ));
 288                            region_ranges.push(range.clone());
 289                            regions.push(ParsedRegion {
 290                                code: false,
 291                                link: Some(Link::Web {
 292                                    url: link.as_str().to_string(),
 293                                }),
 294                            });
 295                            last_link_len = end;
 296                        }
 297                        last_link_len
 298                    };
 299
 300                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 301                        let mut new_highlight = true;
 302                        if let Some((last_range, last_style)) = highlights.last_mut() {
 303                            if last_range.end == last_run_len
 304                                && last_style == &MarkdownHighlight::Style(style.clone())
 305                            {
 306                                last_range.end = text.len();
 307                                new_highlight = false;
 308                            }
 309                        }
 310                        if new_highlight {
 311                            highlights.push((
 312                                last_run_len..text.len(),
 313                                MarkdownHighlight::Style(style.clone()),
 314                            ));
 315                        }
 316                    }
 317                    if let Some(mut image) = image.clone() {
 318                        let is_valid_image = match image.clone() {
 319                            Image::Path { display_path, .. } => {
 320                                gpui::ImageSource::try_from(display_path).is_ok()
 321                            }
 322                            Image::Web { url, .. } => gpui::ImageSource::try_from(url).is_ok(),
 323                        };
 324                        if is_valid_image {
 325                            text.truncate(text.len() - t.len());
 326                            if !t.is_empty() {
 327                                let alt_text = ParsedMarkdownText {
 328                                    source_range: source_range.clone(),
 329                                    contents: t.to_string(),
 330                                    highlights: highlights.clone(),
 331                                    region_ranges: region_ranges.clone(),
 332                                    regions: regions.clone(),
 333                                };
 334                                image = image.with_alt_text(alt_text);
 335                            } else {
 336                                let alt_text = ParsedMarkdownText {
 337                                    source_range: source_range.clone(),
 338                                    contents: "img".to_string(),
 339                                    highlights: highlights.clone(),
 340                                    region_ranges: region_ranges.clone(),
 341                                    regions: regions.clone(),
 342                                };
 343                                image = image.with_alt_text(alt_text);
 344                            }
 345                            if !text.is_empty() {
 346                                let parsed_regions =
 347                                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
 348                                        source_range: source_range.clone(),
 349                                        contents: text.clone(),
 350                                        highlights: highlights.clone(),
 351                                        region_ranges: region_ranges.clone(),
 352                                        regions: regions.clone(),
 353                                    });
 354                                text = String::new();
 355                                highlights = vec![];
 356                                region_ranges = vec![];
 357                                regions = vec![];
 358                                markdown_text_like.push(parsed_regions);
 359                            }
 360
 361                            let parsed_image = MarkdownParagraphChunk::Image(image.clone());
 362                            markdown_text_like.push(parsed_image);
 363                            style = MarkdownHighlightStyle::default();
 364                        }
 365                        style.underline = true;
 366                    };
 367                }
 368                Event::Code(t) => {
 369                    text.push_str(t.as_ref());
 370                    region_ranges.push(prev_len..text.len());
 371
 372                    if link.is_some() {
 373                        highlights.push((
 374                            prev_len..text.len(),
 375                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 376                                underline: true,
 377                                ..Default::default()
 378                            }),
 379                        ));
 380                    }
 381                    regions.push(ParsedRegion {
 382                        code: true,
 383                        link: link.clone(),
 384                    });
 385                }
 386                Event::Start(tag) => match tag {
 387                    Tag::Emphasis => italic_depth += 1,
 388                    Tag::Strong => bold_depth += 1,
 389                    Tag::Strikethrough => strikethrough_depth += 1,
 390                    Tag::Link { dest_url, .. } => {
 391                        link = Link::identify(
 392                            self.file_location_directory.clone(),
 393                            dest_url.to_string(),
 394                        );
 395                    }
 396                    Tag::Image { dest_url, .. } => {
 397                        image = Image::identify(
 398                            source_range.clone(),
 399                            self.file_location_directory.clone(),
 400                            dest_url.to_string(),
 401                            link.clone(),
 402                        );
 403                    }
 404                    _ => {
 405                        break;
 406                    }
 407                },
 408
 409                Event::End(tag) => match tag {
 410                    TagEnd::Emphasis => italic_depth -= 1,
 411                    TagEnd::Strong => bold_depth -= 1,
 412                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 413                    TagEnd::Link => {
 414                        link = None;
 415                    }
 416                    TagEnd::Image => {
 417                        image = None;
 418                    }
 419                    TagEnd::Paragraph => {
 420                        self.cursor += 1;
 421                        break;
 422                    }
 423                    _ => {
 424                        break;
 425                    }
 426                },
 427                _ => {
 428                    break;
 429                }
 430            }
 431
 432            self.cursor += 1;
 433        }
 434        if !text.is_empty() {
 435            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 436                source_range: source_range.clone(),
 437                contents: text,
 438                highlights,
 439                regions,
 440                region_ranges,
 441            }));
 442        }
 443        markdown_text_like
 444    }
 445
 446    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 447        let (_event, source_range) = self.previous().unwrap();
 448        let source_range = source_range.clone();
 449        let text = self.parse_text(true, None);
 450
 451        // Advance past the heading end tag
 452        self.cursor += 1;
 453
 454        ParsedMarkdownHeading {
 455            source_range: source_range.clone(),
 456            level: match level {
 457                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 458                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 459                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 460                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 461                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 462                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 463            },
 464            contents: text,
 465        }
 466    }
 467
 468    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 469        let (_event, source_range) = self.previous().unwrap();
 470        let source_range = source_range.clone();
 471        let mut header = ParsedMarkdownTableRow::new();
 472        let mut body = vec![];
 473        let mut current_row = vec![];
 474        let mut in_header = true;
 475        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 476
 477        loop {
 478            if self.eof() {
 479                break;
 480            }
 481
 482            let (current, source_range) = self.current().unwrap();
 483            let source_range = source_range.clone();
 484            match current {
 485                Event::Start(Tag::TableHead)
 486                | Event::Start(Tag::TableRow)
 487                | Event::End(TagEnd::TableCell) => {
 488                    self.cursor += 1;
 489                }
 490                Event::Start(Tag::TableCell) => {
 491                    self.cursor += 1;
 492                    let cell_contents = self.parse_text(false, Some(source_range));
 493                    current_row.push(cell_contents);
 494                }
 495                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 496                    self.cursor += 1;
 497                    let new_row = std::mem::take(&mut current_row);
 498                    if in_header {
 499                        header.children = new_row;
 500                        in_header = false;
 501                    } else {
 502                        let row = ParsedMarkdownTableRow::with_children(new_row);
 503                        body.push(row);
 504                    }
 505                }
 506                Event::End(TagEnd::Table) => {
 507                    self.cursor += 1;
 508                    break;
 509                }
 510                _ => {
 511                    break;
 512                }
 513            }
 514        }
 515
 516        ParsedMarkdownTable {
 517            source_range,
 518            header,
 519            body,
 520            column_alignments,
 521        }
 522    }
 523
 524    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 525        match alignment {
 526            Alignment::None => ParsedMarkdownTableAlignment::None,
 527            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 528            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 529            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 530        }
 531    }
 532
 533    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 534        let (_, list_source_range) = self.previous().unwrap();
 535
 536        let mut items = Vec::new();
 537        let mut items_stack = vec![MarkdownListItem::default()];
 538        let mut depth = 1;
 539        let mut order = order;
 540        let mut order_stack = Vec::new();
 541
 542        let mut insertion_indices = FxHashMap::default();
 543        let mut source_ranges = FxHashMap::default();
 544        let mut start_item_range = list_source_range.clone();
 545
 546        while !self.eof() {
 547            let (current, source_range) = self.current().unwrap();
 548            match current {
 549                Event::Start(Tag::List(new_order)) => {
 550                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 551                        insertion_indices.insert(depth, items.len());
 552                    }
 553
 554                    // We will use the start of the nested list as the end for the current item's range,
 555                    // because we don't care about the hierarchy of list items
 556                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 557                        e.insert(start_item_range.start..source_range.start);
 558                    }
 559
 560                    order_stack.push(order);
 561                    order = *new_order;
 562                    self.cursor += 1;
 563                    depth += 1;
 564                }
 565                Event::End(TagEnd::List(_)) => {
 566                    order = order_stack.pop().flatten();
 567                    self.cursor += 1;
 568                    depth -= 1;
 569
 570                    if depth == 0 {
 571                        break;
 572                    }
 573                }
 574                Event::Start(Tag::Item) => {
 575                    start_item_range = source_range.clone();
 576
 577                    self.cursor += 1;
 578                    items_stack.push(MarkdownListItem::default());
 579
 580                    let mut task_list = None;
 581                    // Check for task list marker (`- [ ]` or `- [x]`)
 582                    if let Some(event) = self.current_event() {
 583                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 584                        if event == &Event::Start(Tag::Paragraph) {
 585                            self.cursor += 1;
 586                        }
 587
 588                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 589                            task_list = Some((*checked, range.clone()));
 590                            self.cursor += 1;
 591                        }
 592                    }
 593
 594                    if let Some((event, range)) = self.current() {
 595                        // This is a plain list item.
 596                        // For example `- some text` or `1. [Docs](./docs.md)`
 597                        if MarkdownParser::is_text_like(event) {
 598                            let text = self.parse_text(false, Some(range.clone()));
 599                            let block = ParsedMarkdownElement::Paragraph(text);
 600                            if let Some(content) = items_stack.last_mut() {
 601                                let item_type = if let Some((checked, range)) = task_list {
 602                                    ParsedMarkdownListItemType::Task(checked, range)
 603                                } else if let Some(order) = order {
 604                                    ParsedMarkdownListItemType::Ordered(order)
 605                                } else {
 606                                    ParsedMarkdownListItemType::Unordered
 607                                };
 608                                content.item_type = item_type;
 609                                content.content.push(block);
 610                            }
 611                        } else {
 612                            let block = self.parse_block().await;
 613                            if let Some(block) = block {
 614                                if let Some(list_item) = items_stack.last_mut() {
 615                                    list_item.content.extend(block);
 616                                }
 617                            }
 618                        }
 619                    }
 620
 621                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 622                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 623                        self.cursor += 1;
 624                    }
 625                }
 626                Event::End(TagEnd::Item) => {
 627                    self.cursor += 1;
 628
 629                    if let Some(current) = order {
 630                        order = Some(current + 1);
 631                    }
 632
 633                    if let Some(list_item) = items_stack.pop() {
 634                        let source_range = source_ranges
 635                            .remove(&depth)
 636                            .unwrap_or(start_item_range.clone());
 637
 638                        // We need to remove the last character of the source range, because it includes the newline character
 639                        let source_range = source_range.start..source_range.end - 1;
 640                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 641                            source_range,
 642                            content: list_item.content,
 643                            depth,
 644                            item_type: list_item.item_type,
 645                        });
 646
 647                        if let Some(index) = insertion_indices.get(&depth) {
 648                            items.insert(*index, item);
 649                            insertion_indices.remove(&depth);
 650                        } else {
 651                            items.push(item);
 652                        }
 653                    }
 654                }
 655                _ => {
 656                    if depth == 0 {
 657                        break;
 658                    }
 659                    // This can only happen if a list item starts with more then one paragraph,
 660                    // or the list item contains blocks that should be rendered after the nested list items
 661                    let block = self.parse_block().await;
 662                    if let Some(block) = block {
 663                        if let Some(list_item) = items_stack.last_mut() {
 664                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 665                            if !insertion_indices.contains_key(&depth) {
 666                                list_item.content.extend(block);
 667                                continue;
 668                            }
 669                        }
 670
 671                        // Otherwise we need to insert the block after all the nested items
 672                        // that have been parsed so far
 673                        items.extend(block);
 674                    } else {
 675                        self.cursor += 1;
 676                    }
 677                }
 678            }
 679        }
 680
 681        items
 682    }
 683
 684    #[async_recursion]
 685    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 686        let (_event, source_range) = self.previous().unwrap();
 687        let source_range = source_range.clone();
 688        let mut nested_depth = 1;
 689
 690        let mut children: Vec<ParsedMarkdownElement> = vec![];
 691
 692        while !self.eof() {
 693            let block = self.parse_block().await;
 694
 695            if let Some(block) = block {
 696                children.extend(block);
 697            } else {
 698                break;
 699            }
 700
 701            if self.eof() {
 702                break;
 703            }
 704
 705            let (current, _source_range) = self.current().unwrap();
 706            match current {
 707                // This is a nested block quote.
 708                // Record that we're in a nested block quote and continue parsing.
 709                // We don't need to advance the cursor since the next
 710                // call to `parse_block` will handle it.
 711                Event::Start(Tag::BlockQuote(_kind)) => {
 712                    nested_depth += 1;
 713                }
 714                Event::End(TagEnd::BlockQuote(_kind)) => {
 715                    nested_depth -= 1;
 716                    if nested_depth == 0 {
 717                        self.cursor += 1;
 718                        break;
 719                    }
 720                }
 721                _ => {}
 722            };
 723        }
 724
 725        ParsedMarkdownBlockQuote {
 726            source_range,
 727            children,
 728        }
 729    }
 730
 731    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 732        let (_event, source_range) = self.previous().unwrap();
 733        let source_range = source_range.clone();
 734        let mut code = String::new();
 735
 736        while !self.eof() {
 737            let (current, _source_range) = self.current().unwrap();
 738            match current {
 739                Event::Text(text) => {
 740                    code.push_str(text);
 741                    self.cursor += 1;
 742                }
 743                Event::End(TagEnd::CodeBlock) => {
 744                    self.cursor += 1;
 745                    break;
 746                }
 747                _ => {
 748                    break;
 749                }
 750            }
 751        }
 752        let highlights = if let Some(language) = &language {
 753            if let Some(registry) = &self.language_registry {
 754                let rope: language::Rope = code.as_str().into();
 755                registry
 756                    .language_for_name_or_extension(language)
 757                    .await
 758                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 759                    .ok()
 760            } else {
 761                None
 762            }
 763        } else {
 764            None
 765        };
 766
 767        ParsedMarkdownCodeBlock {
 768            source_range,
 769            contents: code.trim().to_string().into(),
 770            language,
 771            highlights,
 772        }
 773    }
 774}
 775
 776#[cfg(test)]
 777mod tests {
 778    use core::panic;
 779
 780    use super::*;
 781
 782    use gpui::BackgroundExecutor;
 783    use language::{
 784        tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry,
 785    };
 786    use pretty_assertions::assert_eq;
 787    use ParsedMarkdownListItemType::*;
 788
 789    async fn parse(input: &str) -> ParsedMarkdown {
 790        parse_markdown(input, None, None).await
 791    }
 792
 793    #[gpui::test]
 794    async fn test_headings() {
 795        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 796
 797        assert_eq!(
 798            parsed.children,
 799            vec![
 800                h1(text("Heading one", 2..13), 0..14),
 801                h2(text("Heading two", 17..28), 14..29),
 802                h3(text("Heading three", 33..46), 29..46),
 803            ]
 804        );
 805    }
 806
 807    #[gpui::test]
 808    async fn test_newlines_dont_new_paragraphs() {
 809        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 810
 811        assert_eq!(
 812            parsed.children,
 813            vec![p("Some text that is bolded and italicized", 0..46)]
 814        );
 815    }
 816
 817    #[gpui::test]
 818    async fn test_heading_with_paragraph() {
 819        let parsed = parse("# Zed\nThe editor").await;
 820
 821        assert_eq!(
 822            parsed.children,
 823            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 824        );
 825    }
 826
 827    #[gpui::test]
 828    async fn test_double_newlines_do_new_paragraphs() {
 829        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 830
 831        assert_eq!(
 832            parsed.children,
 833            vec![
 834                p("Some text that is bolded", 0..29),
 835                p("and italicized", 31..47),
 836            ]
 837        );
 838    }
 839
 840    #[gpui::test]
 841    async fn test_bold_italic_text() {
 842        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 843
 844        assert_eq!(
 845            parsed.children,
 846            vec![p("Some text that is bolded and italicized", 0..45)]
 847        );
 848    }
 849
 850    #[gpui::test]
 851    async fn test_nested_bold_strikethrough_text() {
 852        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 853
 854        assert_eq!(parsed.children.len(), 1);
 855        assert_eq!(
 856            parsed.children[0],
 857            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
 858                ParsedMarkdownText {
 859                    source_range: 0..35,
 860                    contents: "Some bostrikethroughld text".to_string(),
 861                    highlights: Vec::new(),
 862                    region_ranges: Vec::new(),
 863                    regions: Vec::new(),
 864                }
 865            )])
 866        );
 867
 868        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 869            text
 870        } else {
 871            panic!("Expected a paragraph");
 872        };
 873
 874        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
 875            text
 876        } else {
 877            panic!("Expected a text");
 878        };
 879
 880        assert_eq!(
 881            paragraph.highlights,
 882            vec![
 883                (
 884                    5..7,
 885                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 886                        weight: FontWeight::BOLD,
 887                        ..Default::default()
 888                    }),
 889                ),
 890                (
 891                    7..20,
 892                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 893                        weight: FontWeight::BOLD,
 894                        strikethrough: true,
 895                        ..Default::default()
 896                    }),
 897                ),
 898                (
 899                    20..22,
 900                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 901                        weight: FontWeight::BOLD,
 902                        ..Default::default()
 903                    }),
 904                ),
 905            ]
 906        );
 907    }
 908
 909    #[gpui::test]
 910    async fn test_text_with_inline_html() {
 911        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 912
 913        assert_eq!(
 914            parsed.children,
 915            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 916        );
 917    }
 918
 919    #[gpui::test]
 920    async fn test_raw_links_detection() {
 921        let parsed = parse("Checkout this https://zed.dev link").await;
 922
 923        assert_eq!(
 924            parsed.children,
 925            vec![p("Checkout this https://zed.dev link", 0..34)]
 926        );
 927    }
 928
 929    #[gpui::test]
 930    async fn test_image_links_detection() {
 931        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
 932
 933        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 934            text
 935        } else {
 936            panic!("Expected a paragraph");
 937        };
 938        assert_eq!(
 939            paragraph[0],
 940            MarkdownParagraphChunk::Image(Image::Web {
 941                source_range: 0..111,
 942                url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
 943                link: None,
 944                alt_text: Some(
 945                        ParsedMarkdownText {
 946                        source_range: 0..111,
 947                       contents: "test".to_string(),
 948                       highlights: vec![],
 949                     region_ranges: vec![],
 950                      regions: vec![],
 951                 },
 952                  ),
 953            },)
 954        );
 955    }
 956
 957    #[gpui::test]
 958    async fn test_header_only_table() {
 959        let markdown = "\
 960| Header 1 | Header 2 |
 961|----------|----------|
 962
 963Some other content
 964";
 965
 966        let expected_table = table(
 967            0..48,
 968            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 969            vec![],
 970        );
 971
 972        assert_eq!(
 973            parse(markdown).await.children[0],
 974            ParsedMarkdownElement::Table(expected_table)
 975        );
 976    }
 977
 978    #[gpui::test]
 979    async fn test_basic_table() {
 980        let markdown = "\
 981| Header 1 | Header 2 |
 982|----------|----------|
 983| Cell 1   | Cell 2   |
 984| Cell 3   | Cell 4   |";
 985
 986        let expected_table = table(
 987            0..95,
 988            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 989            vec![
 990                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 991                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 992            ],
 993        );
 994
 995        assert_eq!(
 996            parse(markdown).await.children[0],
 997            ParsedMarkdownElement::Table(expected_table)
 998        );
 999    }
1000
1001    #[gpui::test]
1002    async fn test_list_basic() {
1003        let parsed = parse(
1004            "\
1005* Item 1
1006* Item 2
1007* Item 3
1008",
1009        )
1010        .await;
1011
1012        assert_eq!(
1013            parsed.children,
1014            vec![
1015                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1016                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1017                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1018            ],
1019        );
1020    }
1021
1022    #[gpui::test]
1023    async fn test_list_with_tasks() {
1024        let parsed = parse(
1025            "\
1026- [ ] TODO
1027- [x] Checked
1028",
1029        )
1030        .await;
1031
1032        assert_eq!(
1033            parsed.children,
1034            vec![
1035                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1036                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1037            ],
1038        );
1039    }
1040
1041    #[gpui::test]
1042    async fn test_list_with_indented_task() {
1043        let parsed = parse(
1044            "\
1045- [ ] TODO
1046  - [x] Checked
1047  - Unordered
1048  1. Number 1
1049  1. Number 2
10501. Number A
1051",
1052        )
1053        .await;
1054
1055        assert_eq!(
1056            parsed.children,
1057            vec![
1058                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1059                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1060                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1061                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1062                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1063                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1064            ],
1065        );
1066    }
1067
1068    #[gpui::test]
1069    async fn test_list_with_linebreak_is_handled_correctly() {
1070        let parsed = parse(
1071            "\
1072- [ ] Task 1
1073
1074- [x] Task 2
1075",
1076        )
1077        .await;
1078
1079        assert_eq!(
1080            parsed.children,
1081            vec![
1082                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1083                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1084            ],
1085        );
1086    }
1087
1088    #[gpui::test]
1089    async fn test_list_nested() {
1090        let parsed = parse(
1091            "\
1092* Item 1
1093* Item 2
1094* Item 3
1095
10961. Hello
10971. Two
1098   1. Three
10992. Four
11003. Five
1101
1102* First
1103  1. Hello
1104     1. Goodbyte
1105        - Inner
1106        - Inner
1107  2. Goodbyte
1108        - Next item empty
1109        -
1110* Last
1111",
1112        )
1113        .await;
1114
1115        assert_eq!(
1116            parsed.children,
1117            vec![
1118                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1119                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1120                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1121                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1122                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1123                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1124                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1125                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1126                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1127                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1128                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1129                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1130                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1131                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1132                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1133                list_item(186..190, 3, Unordered, vec![]),
1134                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1135            ]
1136        );
1137    }
1138
1139    #[gpui::test]
1140    async fn test_list_with_nested_content() {
1141        let parsed = parse(
1142            "\
1143*   This is a list item with two paragraphs.
1144
1145    This is the second paragraph in the list item.
1146",
1147        )
1148        .await;
1149
1150        assert_eq!(
1151            parsed.children,
1152            vec![list_item(
1153                0..96,
1154                1,
1155                Unordered,
1156                vec![
1157                    p("This is a list item with two paragraphs.", 4..44),
1158                    p("This is the second paragraph in the list item.", 50..97)
1159                ],
1160            ),],
1161        );
1162    }
1163
1164    #[gpui::test]
1165    async fn test_list_item_with_inline_html() {
1166        let parsed = parse(
1167            "\
1168*   This is a list item with an inline HTML <sometag>tag</sometag>.
1169",
1170        )
1171        .await;
1172
1173        assert_eq!(
1174            parsed.children,
1175            vec![list_item(
1176                0..67,
1177                1,
1178                Unordered,
1179                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1180            ),],
1181        );
1182    }
1183
1184    #[gpui::test]
1185    async fn test_nested_list_with_paragraph_inside() {
1186        let parsed = parse(
1187            "\
11881. a
1189    1. b
1190        1. c
1191
1192    text
1193
1194    1. d
1195",
1196        )
1197        .await;
1198
1199        assert_eq!(
1200            parsed.children,
1201            vec![
1202                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1203                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1204                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1205                p("text", 32..37),
1206                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1207            ],
1208        );
1209    }
1210
1211    #[gpui::test]
1212    async fn test_list_with_leading_text() {
1213        let parsed = parse(
1214            "\
1215* `code`
1216* **bold**
1217* [link](https://example.com)
1218",
1219        )
1220        .await;
1221
1222        assert_eq!(
1223            parsed.children,
1224            vec![
1225                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1226                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1227                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1228            ],
1229        );
1230    }
1231
1232    #[gpui::test]
1233    async fn test_simple_block_quote() {
1234        let parsed = parse("> Simple block quote with **styled text**").await;
1235
1236        assert_eq!(
1237            parsed.children,
1238            vec![block_quote(
1239                vec![p("Simple block quote with styled text", 2..41)],
1240                0..41
1241            )]
1242        );
1243    }
1244
1245    #[gpui::test]
1246    async fn test_simple_block_quote_with_multiple_lines() {
1247        let parsed = parse(
1248            "\
1249> # Heading
1250> More
1251> text
1252>
1253> More text
1254",
1255        )
1256        .await;
1257
1258        assert_eq!(
1259            parsed.children,
1260            vec![block_quote(
1261                vec![
1262                    h1(text("Heading", 4..11), 2..12),
1263                    p("More text", 14..26),
1264                    p("More text", 30..40)
1265                ],
1266                0..40
1267            )]
1268        );
1269    }
1270
1271    #[gpui::test]
1272    async fn test_nested_block_quote() {
1273        let parsed = parse(
1274            "\
1275> A
1276>
1277> > # B
1278>
1279> C
1280
1281More text
1282",
1283        )
1284        .await;
1285
1286        assert_eq!(
1287            parsed.children,
1288            vec![
1289                block_quote(
1290                    vec![
1291                        p("A", 2..4),
1292                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1293                        p("C", 18..20)
1294                    ],
1295                    0..20
1296                ),
1297                p("More text", 21..31)
1298            ]
1299        );
1300    }
1301
1302    #[gpui::test]
1303    async fn test_code_block() {
1304        let parsed = parse(
1305            "\
1306```
1307fn main() {
1308    return 0;
1309}
1310```
1311",
1312        )
1313        .await;
1314
1315        assert_eq!(
1316            parsed.children,
1317            vec![code_block(
1318                None,
1319                "fn main() {\n    return 0;\n}",
1320                0..35,
1321                None
1322            )]
1323        );
1324    }
1325
1326    #[gpui::test]
1327    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1328        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1329        language_registry.add(rust_lang());
1330
1331        let parsed = parse_markdown(
1332            "\
1333```rust
1334fn main() {
1335    return 0;
1336}
1337```
1338",
1339            None,
1340            Some(language_registry),
1341        )
1342        .await;
1343
1344        assert_eq!(
1345            parsed.children,
1346            vec![code_block(
1347                Some("rust".to_string()),
1348                "fn main() {\n    return 0;\n}",
1349                0..39,
1350                Some(vec![])
1351            )]
1352        );
1353    }
1354
1355    fn rust_lang() -> Arc<Language> {
1356        Arc::new(Language::new(
1357            LanguageConfig {
1358                name: "Rust".into(),
1359                matcher: LanguageMatcher {
1360                    path_suffixes: vec!["rs".into()],
1361                    ..Default::default()
1362                },
1363                collapsed_placeholder: " /* ... */ ".to_string(),
1364                ..Default::default()
1365            },
1366            Some(tree_sitter_rust::LANGUAGE.into()),
1367        ))
1368    }
1369
1370    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1371        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1372            source_range,
1373            level: HeadingLevel::H1,
1374            contents,
1375        })
1376    }
1377
1378    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1379        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1380            source_range,
1381            level: HeadingLevel::H2,
1382            contents,
1383        })
1384    }
1385
1386    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1387        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1388            source_range,
1389            level: HeadingLevel::H3,
1390            contents,
1391        })
1392    }
1393
1394    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1395        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1396    }
1397
1398    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1399        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1400            highlights: Vec::new(),
1401            region_ranges: Vec::new(),
1402            regions: Vec::new(),
1403            source_range,
1404            contents: contents.to_string(),
1405        })]
1406    }
1407
1408    fn block_quote(
1409        children: Vec<ParsedMarkdownElement>,
1410        source_range: Range<usize>,
1411    ) -> ParsedMarkdownElement {
1412        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1413            source_range,
1414            children,
1415        })
1416    }
1417
1418    fn code_block(
1419        language: Option<String>,
1420        code: &str,
1421        source_range: Range<usize>,
1422        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1423    ) -> ParsedMarkdownElement {
1424        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1425            source_range,
1426            language,
1427            contents: code.to_string().into(),
1428            highlights,
1429        })
1430    }
1431
1432    fn list_item(
1433        source_range: Range<usize>,
1434        depth: u16,
1435        item_type: ParsedMarkdownListItemType,
1436        content: Vec<ParsedMarkdownElement>,
1437    ) -> ParsedMarkdownElement {
1438        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1439            source_range,
1440            item_type,
1441            depth,
1442            content,
1443        })
1444    }
1445
1446    fn table(
1447        source_range: Range<usize>,
1448        header: ParsedMarkdownTableRow,
1449        body: Vec<ParsedMarkdownTableRow>,
1450    ) -> ParsedMarkdownTable {
1451        ParsedMarkdownTable {
1452            column_alignments: Vec::new(),
1453            source_range,
1454            header,
1455            body,
1456        }
1457    }
1458
1459    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1460        ParsedMarkdownTableRow { children }
1461    }
1462
1463    impl PartialEq for ParsedMarkdownTable {
1464        fn eq(&self, other: &Self) -> bool {
1465            self.source_range == other.source_range
1466                && self.header == other.header
1467                && self.body == other.body
1468        }
1469    }
1470
1471    impl PartialEq for ParsedMarkdownText {
1472        fn eq(&self, other: &Self) -> bool {
1473            self.source_range == other.source_range && self.contents == other.contents
1474        }
1475    }
1476}