markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::FontWeight;
   5use language::LanguageRegistry;
   6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   7use std::{ops::Range, path::PathBuf, sync::Arc};
   8
   9pub async fn parse_markdown(
  10    markdown_input: &str,
  11    file_location_directory: Option<PathBuf>,
  12    language_registry: Option<Arc<LanguageRegistry>>,
  13) -> ParsedMarkdown {
  14    let mut options = Options::all();
  15    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  16
  17    let parser = Parser::new_ext(markdown_input, options);
  18    let parser = MarkdownParser::new(
  19        parser.into_offset_iter().collect(),
  20        file_location_directory,
  21        language_registry,
  22    );
  23    let renderer = parser.parse_document().await;
  24    ParsedMarkdown {
  25        children: renderer.parsed,
  26    }
  27}
  28
  29struct MarkdownParser<'a> {
  30    tokens: Vec<(Event<'a>, Range<usize>)>,
  31    /// The current index in the tokens array
  32    cursor: usize,
  33    /// The blocks that we have successfully parsed so far
  34    parsed: Vec<ParsedMarkdownElement>,
  35    file_location_directory: Option<PathBuf>,
  36    language_registry: Option<Arc<LanguageRegistry>>,
  37}
  38
  39struct MarkdownListItem {
  40    content: Vec<ParsedMarkdownElement>,
  41    item_type: ParsedMarkdownListItemType,
  42}
  43
  44impl Default for MarkdownListItem {
  45    fn default() -> Self {
  46        Self {
  47            content: Vec::new(),
  48            item_type: ParsedMarkdownListItemType::Unordered,
  49        }
  50    }
  51}
  52
  53impl<'a> MarkdownParser<'a> {
  54    fn new(
  55        tokens: Vec<(Event<'a>, Range<usize>)>,
  56        file_location_directory: Option<PathBuf>,
  57        language_registry: Option<Arc<LanguageRegistry>>,
  58    ) -> Self {
  59        Self {
  60            tokens,
  61            file_location_directory,
  62            language_registry,
  63            cursor: 0,
  64            parsed: vec![],
  65        }
  66    }
  67
  68    fn eof(&self) -> bool {
  69        if self.tokens.is_empty() {
  70            return true;
  71        }
  72        self.cursor >= self.tokens.len() - 1
  73    }
  74
  75    fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
  76        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  77            return self.tokens.last();
  78        }
  79        return self.tokens.get(self.cursor + steps);
  80    }
  81
  82    fn previous(&self) -> Option<&(Event, Range<usize>)> {
  83        if self.cursor == 0 || self.cursor > self.tokens.len() {
  84            return None;
  85        }
  86        return self.tokens.get(self.cursor - 1);
  87    }
  88
  89    fn current(&self) -> Option<&(Event, Range<usize>)> {
  90        return self.peek(0);
  91    }
  92
  93    fn current_event(&self) -> Option<&Event> {
  94        return self.current().map(|(event, _)| event);
  95    }
  96
  97    fn is_text_like(event: &Event) -> bool {
  98        match event {
  99            Event::Text(_)
 100            // Represent an inline code block
 101            | Event::Code(_)
 102            | Event::Html(_)
 103            | Event::FootnoteReference(_)
 104            | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
 105            | Event::Start(Tag::Emphasis)
 106            | Event::Start(Tag::Strong)
 107            | Event::Start(Tag::Strikethrough)
 108            | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
 109                true
 110            }
 111            _ => false,
 112        }
 113    }
 114
 115    async fn parse_document(mut self) -> Self {
 116        while !self.eof() {
 117            if let Some(block) = self.parse_block().await {
 118                self.parsed.extend(block);
 119            } else {
 120                self.cursor += 1;
 121            }
 122        }
 123        self
 124    }
 125
 126    #[async_recursion]
 127    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 128        let (current, source_range) = self.current().unwrap();
 129        let source_range = source_range.clone();
 130        match current {
 131            Event::Start(tag) => match tag {
 132                Tag::Paragraph => {
 133                    self.cursor += 1;
 134                    let text = self.parse_text(false, Some(source_range));
 135                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 136                }
 137                Tag::Heading {
 138                    level,
 139                    id: _,
 140                    classes: _,
 141                    attrs: _,
 142                } => {
 143                    let level = *level;
 144                    self.cursor += 1;
 145                    let heading = self.parse_heading(level);
 146                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 147                }
 148                Tag::Table(alignment) => {
 149                    let alignment = alignment.clone();
 150                    self.cursor += 1;
 151                    let table = self.parse_table(alignment);
 152                    Some(vec![ParsedMarkdownElement::Table(table)])
 153                }
 154                Tag::List(order) => {
 155                    let order = *order;
 156                    self.cursor += 1;
 157                    let list = self.parse_list(order).await;
 158                    Some(list)
 159                }
 160                Tag::BlockQuote(_kind) => {
 161                    self.cursor += 1;
 162                    let block_quote = self.parse_block_quote().await;
 163                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 164                }
 165                Tag::CodeBlock(kind) => {
 166                    let language = match kind {
 167                        pulldown_cmark::CodeBlockKind::Indented => None,
 168                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 169                            if language.is_empty() {
 170                                None
 171                            } else {
 172                                Some(language.to_string())
 173                            }
 174                        }
 175                    };
 176
 177                    self.cursor += 1;
 178
 179                    let code_block = self.parse_code_block(language).await;
 180                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 181                }
 182                _ => None,
 183            },
 184            Event::Rule => {
 185                let source_range = source_range.clone();
 186                self.cursor += 1;
 187                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 188            }
 189            _ => None,
 190        }
 191    }
 192
 193    fn parse_text(
 194        &mut self,
 195        should_complete_on_soft_break: bool,
 196        source_range: Option<Range<usize>>,
 197    ) -> ParsedMarkdownText {
 198        let source_range = source_range.unwrap_or_else(|| {
 199            self.current()
 200                .map(|(_, range)| range.clone())
 201                .unwrap_or_default()
 202        });
 203
 204        let mut text = String::new();
 205        let mut bold_depth = 0;
 206        let mut italic_depth = 0;
 207        let mut strikethrough_depth = 0;
 208        let mut link: Option<Link> = None;
 209        let mut region_ranges: Vec<Range<usize>> = vec![];
 210        let mut regions: Vec<ParsedRegion> = vec![];
 211        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 212
 213        let mut link_urls: Vec<String> = vec![];
 214        let mut link_ranges: Vec<Range<usize>> = vec![];
 215
 216        loop {
 217            if self.eof() {
 218                break;
 219            }
 220
 221            let (current, _source_range) = self.current().unwrap();
 222            let prev_len = text.len();
 223            match current {
 224                Event::SoftBreak => {
 225                    if should_complete_on_soft_break {
 226                        break;
 227                    }
 228
 229                    // `Some text\nSome more text` should be treated as a single line.
 230                    text.push(' ');
 231                }
 232
 233                Event::HardBreak => {
 234                    text.push('\n');
 235                }
 236
 237                // We want to ignore any inline HTML tags in the text but keep
 238                // the text between them
 239                Event::InlineHtml(_) => {}
 240
 241                Event::Text(t) => {
 242                    text.push_str(t.as_ref());
 243
 244                    let mut style = MarkdownHighlightStyle::default();
 245
 246                    if bold_depth > 0 {
 247                        style.weight = FontWeight::BOLD;
 248                    }
 249
 250                    if italic_depth > 0 {
 251                        style.italic = true;
 252                    }
 253
 254                    if strikethrough_depth > 0 {
 255                        style.strikethrough = true;
 256                    }
 257
 258                    let last_run_len = if let Some(link) = link.clone() {
 259                        region_ranges.push(prev_len..text.len());
 260                        regions.push(ParsedRegion {
 261                            code: false,
 262                            link: Some(link),
 263                        });
 264                        style.underline = true;
 265                        prev_len
 266                    } else {
 267                        // Manually scan for links
 268                        let mut finder = linkify::LinkFinder::new();
 269                        finder.kinds(&[linkify::LinkKind::Url]);
 270                        let mut last_link_len = prev_len;
 271                        for link in finder.links(t) {
 272                            let start = link.start();
 273                            let end = link.end();
 274                            let range = (prev_len + start)..(prev_len + end);
 275                            link_ranges.push(range.clone());
 276                            link_urls.push(link.as_str().to_string());
 277
 278                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 279                            if style != MarkdownHighlightStyle::default()
 280                                && last_link_len < link.start()
 281                            {
 282                                highlights.push((
 283                                    last_link_len..link.start(),
 284                                    MarkdownHighlight::Style(style.clone()),
 285                                ));
 286                            }
 287
 288                            highlights.push((
 289                                range.clone(),
 290                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 291                                    underline: true,
 292                                    ..style
 293                                }),
 294                            ));
 295                            region_ranges.push(range.clone());
 296                            regions.push(ParsedRegion {
 297                                code: false,
 298                                link: Some(Link::Web {
 299                                    url: link.as_str().to_string(),
 300                                }),
 301                            });
 302
 303                            last_link_len = end;
 304                        }
 305                        last_link_len
 306                    };
 307
 308                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 309                        let mut new_highlight = true;
 310                        if let Some((last_range, last_style)) = highlights.last_mut() {
 311                            if last_range.end == last_run_len
 312                                && last_style == &MarkdownHighlight::Style(style.clone())
 313                            {
 314                                last_range.end = text.len();
 315                                new_highlight = false;
 316                            }
 317                        }
 318                        if new_highlight {
 319                            highlights
 320                                .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
 321                        }
 322                    }
 323                }
 324
 325                // Note: This event means "inline code" and not "code block"
 326                Event::Code(t) => {
 327                    text.push_str(t.as_ref());
 328                    region_ranges.push(prev_len..text.len());
 329
 330                    if link.is_some() {
 331                        highlights.push((
 332                            prev_len..text.len(),
 333                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 334                                underline: true,
 335                                ..Default::default()
 336                            }),
 337                        ));
 338                    }
 339
 340                    regions.push(ParsedRegion {
 341                        code: true,
 342                        link: link.clone(),
 343                    });
 344                }
 345
 346                Event::Start(tag) => match tag {
 347                    Tag::Emphasis => italic_depth += 1,
 348                    Tag::Strong => bold_depth += 1,
 349                    Tag::Strikethrough => strikethrough_depth += 1,
 350                    Tag::Link {
 351                        link_type: _,
 352                        dest_url,
 353                        title: _,
 354                        id: _,
 355                    } => {
 356                        link = Link::identify(
 357                            self.file_location_directory.clone(),
 358                            dest_url.to_string(),
 359                        );
 360                    }
 361                    _ => {
 362                        break;
 363                    }
 364                },
 365
 366                Event::End(tag) => match tag {
 367                    TagEnd::Emphasis => {
 368                        italic_depth -= 1;
 369                    }
 370                    TagEnd::Strong => {
 371                        bold_depth -= 1;
 372                    }
 373                    TagEnd::Strikethrough => {
 374                        strikethrough_depth -= 1;
 375                    }
 376                    TagEnd::Link => {
 377                        link = None;
 378                    }
 379                    TagEnd::Paragraph => {
 380                        self.cursor += 1;
 381                        break;
 382                    }
 383                    _ => {
 384                        break;
 385                    }
 386                },
 387
 388                _ => {
 389                    break;
 390                }
 391            }
 392
 393            self.cursor += 1;
 394        }
 395
 396        ParsedMarkdownText {
 397            source_range,
 398            contents: text,
 399            highlights,
 400            regions,
 401            region_ranges,
 402        }
 403    }
 404
 405    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 406        let (_event, source_range) = self.previous().unwrap();
 407        let source_range = source_range.clone();
 408        let text = self.parse_text(true, None);
 409
 410        // Advance past the heading end tag
 411        self.cursor += 1;
 412
 413        ParsedMarkdownHeading {
 414            source_range: source_range.clone(),
 415            level: match level {
 416                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 417                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 418                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 419                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 420                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 421                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 422            },
 423            contents: text,
 424        }
 425    }
 426
 427    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 428        let (_event, source_range) = self.previous().unwrap();
 429        let source_range = source_range.clone();
 430        let mut header = ParsedMarkdownTableRow::new();
 431        let mut body = vec![];
 432        let mut current_row = vec![];
 433        let mut in_header = true;
 434        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 435
 436        loop {
 437            if self.eof() {
 438                break;
 439            }
 440
 441            let (current, source_range) = self.current().unwrap();
 442            let source_range = source_range.clone();
 443            match current {
 444                Event::Start(Tag::TableHead)
 445                | Event::Start(Tag::TableRow)
 446                | Event::End(TagEnd::TableCell) => {
 447                    self.cursor += 1;
 448                }
 449                Event::Start(Tag::TableCell) => {
 450                    self.cursor += 1;
 451                    let cell_contents = self.parse_text(false, Some(source_range));
 452                    current_row.push(cell_contents);
 453                }
 454                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 455                    self.cursor += 1;
 456                    let new_row = std::mem::take(&mut current_row);
 457                    if in_header {
 458                        header.children = new_row;
 459                        in_header = false;
 460                    } else {
 461                        let row = ParsedMarkdownTableRow::with_children(new_row);
 462                        body.push(row);
 463                    }
 464                }
 465                Event::End(TagEnd::Table) => {
 466                    self.cursor += 1;
 467                    break;
 468                }
 469                _ => {
 470                    break;
 471                }
 472            }
 473        }
 474
 475        ParsedMarkdownTable {
 476            source_range,
 477            header,
 478            body,
 479            column_alignments,
 480        }
 481    }
 482
 483    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 484        match alignment {
 485            Alignment::None => ParsedMarkdownTableAlignment::None,
 486            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 487            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 488            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 489        }
 490    }
 491
 492    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 493        let (_, list_source_range) = self.previous().unwrap();
 494
 495        let mut items = Vec::new();
 496        let mut items_stack = vec![MarkdownListItem::default()];
 497        let mut depth = 1;
 498        let mut order = order;
 499        let mut order_stack = Vec::new();
 500
 501        let mut insertion_indices = FxHashMap::default();
 502        let mut source_ranges = FxHashMap::default();
 503        let mut start_item_range = list_source_range.clone();
 504
 505        while !self.eof() {
 506            let (current, source_range) = self.current().unwrap();
 507            match current {
 508                Event::Start(Tag::List(new_order)) => {
 509                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 510                        insertion_indices.insert(depth, items.len());
 511                    }
 512
 513                    // We will use the start of the nested list as the end for the current item's range,
 514                    // because we don't care about the hierarchy of list items
 515                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 516                        e.insert(start_item_range.start..source_range.start);
 517                    }
 518
 519                    order_stack.push(order);
 520                    order = *new_order;
 521                    self.cursor += 1;
 522                    depth += 1;
 523                }
 524                Event::End(TagEnd::List(_)) => {
 525                    order = order_stack.pop().flatten();
 526                    self.cursor += 1;
 527                    depth -= 1;
 528
 529                    if depth == 0 {
 530                        break;
 531                    }
 532                }
 533                Event::Start(Tag::Item) => {
 534                    start_item_range = source_range.clone();
 535
 536                    self.cursor += 1;
 537                    items_stack.push(MarkdownListItem::default());
 538
 539                    let mut task_list = None;
 540                    // Check for task list marker (`- [ ]` or `- [x]`)
 541                    if let Some(event) = self.current_event() {
 542                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 543                        if event == &Event::Start(Tag::Paragraph) {
 544                            self.cursor += 1;
 545                        }
 546
 547                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 548                            task_list = Some((*checked, range.clone()));
 549                            self.cursor += 1;
 550                        }
 551                    }
 552
 553                    if let Some((event, range)) = self.current() {
 554                        // This is a plain list item.
 555                        // For example `- some text` or `1. [Docs](./docs.md)`
 556                        if MarkdownParser::is_text_like(event) {
 557                            let text = self.parse_text(false, Some(range.clone()));
 558                            let block = ParsedMarkdownElement::Paragraph(text);
 559                            if let Some(content) = items_stack.last_mut() {
 560                                let item_type = if let Some((checked, range)) = task_list {
 561                                    ParsedMarkdownListItemType::Task(checked, range)
 562                                } else if let Some(order) = order {
 563                                    ParsedMarkdownListItemType::Ordered(order)
 564                                } else {
 565                                    ParsedMarkdownListItemType::Unordered
 566                                };
 567                                content.item_type = item_type;
 568                                content.content.push(block);
 569                            }
 570                        } else {
 571                            let block = self.parse_block().await;
 572                            if let Some(block) = block {
 573                                if let Some(list_item) = items_stack.last_mut() {
 574                                    list_item.content.extend(block);
 575                                }
 576                            }
 577                        }
 578                    }
 579
 580                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 581                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 582                        self.cursor += 1;
 583                    }
 584                }
 585                Event::End(TagEnd::Item) => {
 586                    self.cursor += 1;
 587
 588                    if let Some(current) = order {
 589                        order = Some(current + 1);
 590                    }
 591
 592                    if let Some(list_item) = items_stack.pop() {
 593                        let source_range = source_ranges
 594                            .remove(&depth)
 595                            .unwrap_or(start_item_range.clone());
 596
 597                        // We need to remove the last character of the source range, because it includes the newline character
 598                        let source_range = source_range.start..source_range.end - 1;
 599                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 600                            source_range,
 601                            content: list_item.content,
 602                            depth,
 603                            item_type: list_item.item_type,
 604                        });
 605
 606                        if let Some(index) = insertion_indices.get(&depth) {
 607                            items.insert(*index, item);
 608                            insertion_indices.remove(&depth);
 609                        } else {
 610                            items.push(item);
 611                        }
 612                    }
 613                }
 614                _ => {
 615                    if depth == 0 {
 616                        break;
 617                    }
 618                    // This can only happen if a list item starts with more then one paragraph,
 619                    // or the list item contains blocks that should be rendered after the nested list items
 620                    let block = self.parse_block().await;
 621                    if let Some(block) = block {
 622                        if let Some(list_item) = items_stack.last_mut() {
 623                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 624                            if !insertion_indices.contains_key(&depth) {
 625                                list_item.content.extend(block);
 626                                continue;
 627                            }
 628                        }
 629
 630                        // Otherwise we need to insert the block after all the nested items
 631                        // that have been parsed so far
 632                        items.extend(block);
 633                    } else {
 634                        self.cursor += 1;
 635                    }
 636                }
 637            }
 638        }
 639
 640        items
 641    }
 642
 643    #[async_recursion]
 644    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 645        let (_event, source_range) = self.previous().unwrap();
 646        let source_range = source_range.clone();
 647        let mut nested_depth = 1;
 648
 649        let mut children: Vec<ParsedMarkdownElement> = vec![];
 650
 651        while !self.eof() {
 652            let block = self.parse_block().await;
 653
 654            if let Some(block) = block {
 655                children.extend(block);
 656            } else {
 657                break;
 658            }
 659
 660            if self.eof() {
 661                break;
 662            }
 663
 664            let (current, _source_range) = self.current().unwrap();
 665            match current {
 666                // This is a nested block quote.
 667                // Record that we're in a nested block quote and continue parsing.
 668                // We don't need to advance the cursor since the next
 669                // call to `parse_block` will handle it.
 670                Event::Start(Tag::BlockQuote(_kind)) => {
 671                    nested_depth += 1;
 672                }
 673                Event::End(TagEnd::BlockQuote(_kind)) => {
 674                    nested_depth -= 1;
 675                    if nested_depth == 0 {
 676                        self.cursor += 1;
 677                        break;
 678                    }
 679                }
 680                _ => {}
 681            };
 682        }
 683
 684        ParsedMarkdownBlockQuote {
 685            source_range,
 686            children,
 687        }
 688    }
 689
 690    async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
 691        let (_event, source_range) = self.previous().unwrap();
 692        let source_range = source_range.clone();
 693        let mut code = String::new();
 694
 695        while !self.eof() {
 696            let (current, _source_range) = self.current().unwrap();
 697            match current {
 698                Event::Text(text) => {
 699                    code.push_str(text);
 700                    self.cursor += 1;
 701                }
 702                Event::End(TagEnd::CodeBlock) => {
 703                    self.cursor += 1;
 704                    break;
 705                }
 706                _ => {
 707                    break;
 708                }
 709            }
 710        }
 711
 712        let highlights = if let Some(language) = &language {
 713            if let Some(registry) = &self.language_registry {
 714                let rope: language::Rope = code.as_str().into();
 715                registry
 716                    .language_for_name_or_extension(language)
 717                    .await
 718                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 719                    .ok()
 720            } else {
 721                None
 722            }
 723        } else {
 724            None
 725        };
 726
 727        ParsedMarkdownCodeBlock {
 728            source_range,
 729            contents: code.trim().to_string().into(),
 730            language,
 731            highlights,
 732        }
 733    }
 734}
 735
 736#[cfg(test)]
 737mod tests {
 738    use super::*;
 739
 740    use gpui::BackgroundExecutor;
 741    use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
 742    use pretty_assertions::assert_eq;
 743    use ParsedMarkdownListItemType::*;
 744
 745    async fn parse(input: &str) -> ParsedMarkdown {
 746        parse_markdown(input, None, None).await
 747    }
 748
 749    #[gpui::test]
 750    async fn test_headings() {
 751        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
 752
 753        assert_eq!(
 754            parsed.children,
 755            vec![
 756                h1(text("Heading one", 2..13), 0..14),
 757                h2(text("Heading two", 17..28), 14..29),
 758                h3(text("Heading three", 33..46), 29..46),
 759            ]
 760        );
 761    }
 762
 763    #[gpui::test]
 764    async fn test_newlines_dont_new_paragraphs() {
 765        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
 766
 767        assert_eq!(
 768            parsed.children,
 769            vec![p("Some text that is bolded and italicized", 0..46)]
 770        );
 771    }
 772
 773    #[gpui::test]
 774    async fn test_heading_with_paragraph() {
 775        let parsed = parse("# Zed\nThe editor").await;
 776
 777        assert_eq!(
 778            parsed.children,
 779            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
 780        );
 781    }
 782
 783    #[gpui::test]
 784    async fn test_double_newlines_do_new_paragraphs() {
 785        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
 786
 787        assert_eq!(
 788            parsed.children,
 789            vec![
 790                p("Some text that is bolded", 0..29),
 791                p("and italicized", 31..47),
 792            ]
 793        );
 794    }
 795
 796    #[gpui::test]
 797    async fn test_bold_italic_text() {
 798        let parsed = parse("Some text **that is bolded** and *italicized*").await;
 799
 800        assert_eq!(
 801            parsed.children,
 802            vec![p("Some text that is bolded and italicized", 0..45)]
 803        );
 804    }
 805
 806    #[gpui::test]
 807    async fn test_nested_bold_strikethrough_text() {
 808        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
 809
 810        assert_eq!(parsed.children.len(), 1);
 811        assert_eq!(
 812            parsed.children[0],
 813            ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
 814                source_range: 0..35,
 815                contents: "Some bostrikethroughld text".to_string(),
 816                highlights: Vec::new(),
 817                region_ranges: Vec::new(),
 818                regions: Vec::new(),
 819            })
 820        );
 821
 822        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 823            text
 824        } else {
 825            panic!("Expected a paragraph");
 826        };
 827        assert_eq!(
 828            paragraph.highlights,
 829            vec![
 830                (
 831                    5..7,
 832                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 833                        weight: FontWeight::BOLD,
 834                        ..Default::default()
 835                    }),
 836                ),
 837                (
 838                    7..20,
 839                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 840                        weight: FontWeight::BOLD,
 841                        strikethrough: true,
 842                        ..Default::default()
 843                    }),
 844                ),
 845                (
 846                    20..22,
 847                    MarkdownHighlight::Style(MarkdownHighlightStyle {
 848                        weight: FontWeight::BOLD,
 849                        ..Default::default()
 850                    }),
 851                ),
 852            ]
 853        );
 854    }
 855
 856    #[gpui::test]
 857    async fn test_text_with_inline_html() {
 858        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
 859
 860        assert_eq!(
 861            parsed.children,
 862            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
 863        );
 864    }
 865
 866    #[gpui::test]
 867    async fn test_raw_links_detection() {
 868        let parsed = parse("Checkout this https://zed.dev link").await;
 869
 870        assert_eq!(
 871            parsed.children,
 872            vec![p("Checkout this https://zed.dev link", 0..34)]
 873        );
 874
 875        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
 876            text
 877        } else {
 878            panic!("Expected a paragraph");
 879        };
 880        assert_eq!(
 881            paragraph.highlights,
 882            vec![(
 883                14..29,
 884                MarkdownHighlight::Style(MarkdownHighlightStyle {
 885                    underline: true,
 886                    ..Default::default()
 887                }),
 888            )]
 889        );
 890        assert_eq!(
 891            paragraph.regions,
 892            vec![ParsedRegion {
 893                code: false,
 894                link: Some(Link::Web {
 895                    url: "https://zed.dev".to_string()
 896                }),
 897            }]
 898        );
 899        assert_eq!(paragraph.region_ranges, vec![14..29]);
 900    }
 901
 902    #[gpui::test]
 903    async fn test_header_only_table() {
 904        let markdown = "\
 905| Header 1 | Header 2 |
 906|----------|----------|
 907
 908Some other content
 909";
 910
 911        let expected_table = table(
 912            0..48,
 913            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 914            vec![],
 915        );
 916
 917        assert_eq!(
 918            parse(markdown).await.children[0],
 919            ParsedMarkdownElement::Table(expected_table)
 920        );
 921    }
 922
 923    #[gpui::test]
 924    async fn test_basic_table() {
 925        let markdown = "\
 926| Header 1 | Header 2 |
 927|----------|----------|
 928| Cell 1   | Cell 2   |
 929| Cell 3   | Cell 4   |";
 930
 931        let expected_table = table(
 932            0..95,
 933            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
 934            vec![
 935                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
 936                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
 937            ],
 938        );
 939
 940        assert_eq!(
 941            parse(markdown).await.children[0],
 942            ParsedMarkdownElement::Table(expected_table)
 943        );
 944    }
 945
 946    #[gpui::test]
 947    async fn test_list_basic() {
 948        let parsed = parse(
 949            "\
 950* Item 1
 951* Item 2
 952* Item 3
 953",
 954        )
 955        .await;
 956
 957        assert_eq!(
 958            parsed.children,
 959            vec![
 960                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
 961                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
 962                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
 963            ],
 964        );
 965    }
 966
 967    #[gpui::test]
 968    async fn test_list_with_tasks() {
 969        let parsed = parse(
 970            "\
 971- [ ] TODO
 972- [x] Checked
 973",
 974        )
 975        .await;
 976
 977        assert_eq!(
 978            parsed.children,
 979            vec![
 980                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
 981                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
 982            ],
 983        );
 984    }
 985
 986    #[gpui::test]
 987    async fn test_list_with_indented_task() {
 988        let parsed = parse(
 989            "\
 990- [ ] TODO
 991  - [x] Checked
 992  - Unordered
 993  1. Number 1
 994  1. Number 2
 9951. Number A
 996",
 997        )
 998        .await;
 999
1000        assert_eq!(
1001            parsed.children,
1002            vec![
1003                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1004                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1005                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1006                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1007                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1008                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1009            ],
1010        );
1011    }
1012
1013    #[gpui::test]
1014    async fn test_list_with_linebreak_is_handled_correctly() {
1015        let parsed = parse(
1016            "\
1017- [ ] Task 1
1018
1019- [x] Task 2
1020",
1021        )
1022        .await;
1023
1024        assert_eq!(
1025            parsed.children,
1026            vec![
1027                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1028                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1029            ],
1030        );
1031    }
1032
1033    #[gpui::test]
1034    async fn test_list_nested() {
1035        let parsed = parse(
1036            "\
1037* Item 1
1038* Item 2
1039* Item 3
1040
10411. Hello
10421. Two
1043   1. Three
10442. Four
10453. Five
1046
1047* First
1048  1. Hello
1049     1. Goodbyte
1050        - Inner
1051        - Inner
1052  2. Goodbyte
1053        - Next item empty
1054        -
1055* Last
1056",
1057        )
1058        .await;
1059
1060        assert_eq!(
1061            parsed.children,
1062            vec![
1063                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1064                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1065                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1066                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1067                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1068                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1069                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1070                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1071                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1072                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1073                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1074                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1075                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1076                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1077                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1078                list_item(186..190, 3, Unordered, vec![]),
1079                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1080            ]
1081        );
1082    }
1083
1084    #[gpui::test]
1085    async fn test_list_with_nested_content() {
1086        let parsed = parse(
1087            "\
1088*   This is a list item with two paragraphs.
1089
1090    This is the second paragraph in the list item.
1091",
1092        )
1093        .await;
1094
1095        assert_eq!(
1096            parsed.children,
1097            vec![list_item(
1098                0..96,
1099                1,
1100                Unordered,
1101                vec![
1102                    p("This is a list item with two paragraphs.", 4..44),
1103                    p("This is the second paragraph in the list item.", 50..97)
1104                ],
1105            ),],
1106        );
1107    }
1108
1109    #[gpui::test]
1110    async fn test_list_item_with_inline_html() {
1111        let parsed = parse(
1112            "\
1113*   This is a list item with an inline HTML <sometag>tag</sometag>.
1114",
1115        )
1116        .await;
1117
1118        assert_eq!(
1119            parsed.children,
1120            vec![list_item(
1121                0..67,
1122                1,
1123                Unordered,
1124                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1125            ),],
1126        );
1127    }
1128
1129    #[gpui::test]
1130    async fn test_nested_list_with_paragraph_inside() {
1131        let parsed = parse(
1132            "\
11331. a
1134    1. b
1135        1. c
1136
1137    text
1138
1139    1. d
1140",
1141        )
1142        .await;
1143
1144        assert_eq!(
1145            parsed.children,
1146            vec![
1147                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1148                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1149                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1150                p("text", 32..37),
1151                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1152            ],
1153        );
1154    }
1155
1156    #[gpui::test]
1157    async fn test_list_with_leading_text() {
1158        let parsed = parse(
1159            "\
1160* `code`
1161* **bold**
1162* [link](https://example.com)
1163",
1164        )
1165        .await;
1166
1167        assert_eq!(
1168            parsed.children,
1169            vec![
1170                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1171                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1172                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1173            ],
1174        );
1175    }
1176
1177    #[gpui::test]
1178    async fn test_simple_block_quote() {
1179        let parsed = parse("> Simple block quote with **styled text**").await;
1180
1181        assert_eq!(
1182            parsed.children,
1183            vec![block_quote(
1184                vec![p("Simple block quote with styled text", 2..41)],
1185                0..41
1186            )]
1187        );
1188    }
1189
1190    #[gpui::test]
1191    async fn test_simple_block_quote_with_multiple_lines() {
1192        let parsed = parse(
1193            "\
1194> # Heading
1195> More
1196> text
1197>
1198> More text
1199",
1200        )
1201        .await;
1202
1203        assert_eq!(
1204            parsed.children,
1205            vec![block_quote(
1206                vec![
1207                    h1(text("Heading", 4..11), 2..12),
1208                    p("More text", 14..26),
1209                    p("More text", 30..40)
1210                ],
1211                0..40
1212            )]
1213        );
1214    }
1215
1216    #[gpui::test]
1217    async fn test_nested_block_quote() {
1218        let parsed = parse(
1219            "\
1220> A
1221>
1222> > # B
1223>
1224> C
1225
1226More text
1227",
1228        )
1229        .await;
1230
1231        assert_eq!(
1232            parsed.children,
1233            vec![
1234                block_quote(
1235                    vec![
1236                        p("A", 2..4),
1237                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1238                        p("C", 18..20)
1239                    ],
1240                    0..20
1241                ),
1242                p("More text", 21..31)
1243            ]
1244        );
1245    }
1246
1247    #[gpui::test]
1248    async fn test_code_block() {
1249        let parsed = parse(
1250            "\
1251```
1252fn main() {
1253    return 0;
1254}
1255```
1256",
1257        )
1258        .await;
1259
1260        assert_eq!(
1261            parsed.children,
1262            vec![code_block(
1263                None,
1264                "fn main() {\n    return 0;\n}",
1265                0..35,
1266                None
1267            )]
1268        );
1269    }
1270
1271    #[gpui::test]
1272    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1273        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1274        language_registry.add(rust_lang());
1275
1276        let parsed = parse_markdown(
1277            "\
1278```rust
1279fn main() {
1280    return 0;
1281}
1282```
1283",
1284            None,
1285            Some(language_registry),
1286        )
1287        .await;
1288
1289        assert_eq!(
1290            parsed.children,
1291            vec![code_block(
1292                Some("rust".to_string()),
1293                "fn main() {\n    return 0;\n}",
1294                0..39,
1295                Some(vec![])
1296            )]
1297        );
1298    }
1299
1300    fn rust_lang() -> Arc<Language> {
1301        Arc::new(Language::new(
1302            LanguageConfig {
1303                name: "Rust".into(),
1304                matcher: LanguageMatcher {
1305                    path_suffixes: vec!["rs".into()],
1306                    ..Default::default()
1307                },
1308                collapsed_placeholder: " /* ... */ ".to_string(),
1309                ..Default::default()
1310            },
1311            Some(tree_sitter_rust::LANGUAGE.into()),
1312        ))
1313    }
1314
1315    fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1316        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1317            source_range,
1318            level: HeadingLevel::H1,
1319            contents,
1320        })
1321    }
1322
1323    fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1324        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1325            source_range,
1326            level: HeadingLevel::H2,
1327            contents,
1328        })
1329    }
1330
1331    fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1332        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1333            source_range,
1334            level: HeadingLevel::H3,
1335            contents,
1336        })
1337    }
1338
1339    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1340        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1341    }
1342
1343    fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1344        ParsedMarkdownText {
1345            highlights: Vec::new(),
1346            region_ranges: Vec::new(),
1347            regions: Vec::new(),
1348            source_range,
1349            contents: contents.to_string(),
1350        }
1351    }
1352
1353    fn block_quote(
1354        children: Vec<ParsedMarkdownElement>,
1355        source_range: Range<usize>,
1356    ) -> ParsedMarkdownElement {
1357        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1358            source_range,
1359            children,
1360        })
1361    }
1362
1363    fn code_block(
1364        language: Option<String>,
1365        code: &str,
1366        source_range: Range<usize>,
1367        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1368    ) -> ParsedMarkdownElement {
1369        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1370            source_range,
1371            language,
1372            contents: code.to_string().into(),
1373            highlights,
1374        })
1375    }
1376
1377    fn list_item(
1378        source_range: Range<usize>,
1379        depth: u16,
1380        item_type: ParsedMarkdownListItemType,
1381        content: Vec<ParsedMarkdownElement>,
1382    ) -> ParsedMarkdownElement {
1383        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1384            source_range,
1385            item_type,
1386            depth,
1387            content,
1388        })
1389    }
1390
1391    fn table(
1392        source_range: Range<usize>,
1393        header: ParsedMarkdownTableRow,
1394        body: Vec<ParsedMarkdownTableRow>,
1395    ) -> ParsedMarkdownTable {
1396        ParsedMarkdownTable {
1397            column_alignments: Vec::new(),
1398            source_range,
1399            header,
1400            body,
1401        }
1402    }
1403
1404    fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1405        ParsedMarkdownTableRow { children }
1406    }
1407
1408    impl PartialEq for ParsedMarkdownTable {
1409        fn eq(&self, other: &Self) -> bool {
1410            self.source_range == other.source_range
1411                && self.header == other.header
1412                && self.body == other.body
1413        }
1414    }
1415
1416    impl PartialEq for ParsedMarkdownText {
1417        fn eq(&self, other: &Self) -> bool {
1418            self.source_range == other.source_range && self.contents == other.contents
1419        }
1420    }
1421}