markdown_parser.rs

   1use crate::markdown_elements::*;
   2use async_recursion::async_recursion;
   3use collections::FxHashMap;
   4use gpui::{DefiniteLength, FontWeight, px, relative};
   5use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
   6use language::LanguageRegistry;
   7use markup5ever_rcdom::RcDom;
   8use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
   9use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
  10
  11pub async fn parse_markdown(
  12    markdown_input: &str,
  13    file_location_directory: Option<PathBuf>,
  14    language_registry: Option<Arc<LanguageRegistry>>,
  15) -> ParsedMarkdown {
  16    let mut options = Options::all();
  17    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  18
  19    let parser = Parser::new_ext(markdown_input, options);
  20    let parser = MarkdownParser::new(
  21        parser.into_offset_iter().collect(),
  22        file_location_directory,
  23        language_registry,
  24    );
  25    let renderer = parser.parse_document().await;
  26    ParsedMarkdown {
  27        children: renderer.parsed,
  28    }
  29}
  30
  31struct MarkdownParser<'a> {
  32    tokens: Vec<(Event<'a>, Range<usize>)>,
  33    /// The current index in the tokens array
  34    cursor: usize,
  35    /// The blocks that we have successfully parsed so far
  36    parsed: Vec<ParsedMarkdownElement>,
  37    file_location_directory: Option<PathBuf>,
  38    language_registry: Option<Arc<LanguageRegistry>>,
  39}
  40
  41struct MarkdownListItem {
  42    content: Vec<ParsedMarkdownElement>,
  43    item_type: ParsedMarkdownListItemType,
  44}
  45
  46impl Default for MarkdownListItem {
  47    fn default() -> Self {
  48        Self {
  49            content: Vec::new(),
  50            item_type: ParsedMarkdownListItemType::Unordered,
  51        }
  52    }
  53}
  54
  55impl<'a> MarkdownParser<'a> {
  56    fn new(
  57        tokens: Vec<(Event<'a>, Range<usize>)>,
  58        file_location_directory: Option<PathBuf>,
  59        language_registry: Option<Arc<LanguageRegistry>>,
  60    ) -> Self {
  61        Self {
  62            tokens,
  63            file_location_directory,
  64            language_registry,
  65            cursor: 0,
  66            parsed: vec![],
  67        }
  68    }
  69
  70    fn eof(&self) -> bool {
  71        if self.tokens.is_empty() {
  72            return true;
  73        }
  74        self.cursor >= self.tokens.len() - 1
  75    }
  76
  77    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
  78        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
  79            return self.tokens.last();
  80        }
  81        self.tokens.get(self.cursor + steps)
  82    }
  83
  84    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
  85        if self.cursor == 0 || self.cursor > self.tokens.len() {
  86            return None;
  87        }
  88        self.tokens.get(self.cursor - 1)
  89    }
  90
  91    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
  92        self.peek(0)
  93    }
  94
  95    fn current_event(&self) -> Option<&Event<'_>> {
  96        self.current().map(|(event, _)| event)
  97    }
  98
  99    fn is_text_like(event: &Event) -> bool {
 100        match event {
 101            Event::Text(_)
 102            // Represent an inline code block
 103            | Event::Code(_)
 104            | Event::Html(_)
 105            | Event::InlineHtml(_)
 106            | Event::FootnoteReference(_)
 107            | Event::Start(Tag::Link { .. })
 108            | Event::Start(Tag::Emphasis)
 109            | Event::Start(Tag::Strong)
 110            | Event::Start(Tag::Strikethrough)
 111            | Event::Start(Tag::Image { .. }) => {
 112                true
 113            }
 114            _ => false,
 115        }
 116    }
 117
 118    async fn parse_document(mut self) -> Self {
 119        while !self.eof() {
 120            if let Some(block) = self.parse_block().await {
 121                self.parsed.extend(block);
 122            } else {
 123                self.cursor += 1;
 124            }
 125        }
 126        self
 127    }
 128
 129    #[async_recursion]
 130    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 131        let (current, source_range) = self.current().unwrap();
 132        let source_range = source_range.clone();
 133        match current {
 134            Event::Start(tag) => match tag {
 135                Tag::Paragraph => {
 136                    self.cursor += 1;
 137                    let text = self.parse_text(false, Some(source_range));
 138                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 139                }
 140                Tag::Heading { level, .. } => {
 141                    let level = *level;
 142                    self.cursor += 1;
 143                    let heading = self.parse_heading(level);
 144                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 145                }
 146                Tag::Table(alignment) => {
 147                    let alignment = alignment.clone();
 148                    self.cursor += 1;
 149                    let table = self.parse_table(alignment);
 150                    Some(vec![ParsedMarkdownElement::Table(table)])
 151                }
 152                Tag::List(order) => {
 153                    let order = *order;
 154                    self.cursor += 1;
 155                    let list = self.parse_list(order).await;
 156                    Some(list)
 157                }
 158                Tag::BlockQuote(_kind) => {
 159                    self.cursor += 1;
 160                    let block_quote = self.parse_block_quote().await;
 161                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 162                }
 163                Tag::CodeBlock(kind) => {
 164                    let language = match kind {
 165                        pulldown_cmark::CodeBlockKind::Indented => None,
 166                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 167                            if language.is_empty() {
 168                                None
 169                            } else {
 170                                Some(language.to_string())
 171                            }
 172                        }
 173                    };
 174
 175                    self.cursor += 1;
 176
 177                    let code_block = self.parse_code_block(language).await?;
 178                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 179                }
 180                Tag::HtmlBlock => {
 181                    self.cursor += 1;
 182
 183                    Some(self.parse_html_block().await)
 184                }
 185                _ => None,
 186            },
 187            Event::Rule => {
 188                self.cursor += 1;
 189                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 190            }
 191            _ => None,
 192        }
 193    }
 194
 195    fn parse_text(
 196        &mut self,
 197        should_complete_on_soft_break: bool,
 198        source_range: Option<Range<usize>>,
 199    ) -> MarkdownParagraph {
 200        let source_range = source_range.unwrap_or_else(|| {
 201            self.current()
 202                .map(|(_, range)| range.clone())
 203                .unwrap_or_default()
 204        });
 205
 206        let mut markdown_text_like = Vec::new();
 207        let mut text = String::new();
 208        let mut bold_depth = 0;
 209        let mut italic_depth = 0;
 210        let mut strikethrough_depth = 0;
 211        let mut link: Option<Link> = None;
 212        let mut image: Option<Image> = None;
 213        let mut region_ranges: Vec<Range<usize>> = vec![];
 214        let mut regions: Vec<ParsedRegion> = vec![];
 215        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 216        let mut link_urls: Vec<String> = vec![];
 217        let mut link_ranges: Vec<Range<usize>> = vec![];
 218
 219        loop {
 220            if self.eof() {
 221                break;
 222            }
 223
 224            let (current, _) = self.current().unwrap();
 225            let prev_len = text.len();
 226            match current {
 227                Event::SoftBreak => {
 228                    if should_complete_on_soft_break {
 229                        break;
 230                    }
 231                    text.push(' ');
 232                }
 233
 234                Event::HardBreak => {
 235                    text.push('\n');
 236                }
 237
 238                // We want to ignore any inline HTML tags in the text but keep
 239                // the text between them
 240                Event::InlineHtml(_) => {}
 241
 242                Event::Text(t) => {
 243                    text.push_str(t.as_ref());
 244                    let mut style = MarkdownHighlightStyle::default();
 245
 246                    if bold_depth > 0 {
 247                        style.weight = FontWeight::BOLD;
 248                    }
 249
 250                    if italic_depth > 0 {
 251                        style.italic = true;
 252                    }
 253
 254                    if strikethrough_depth > 0 {
 255                        style.strikethrough = true;
 256                    }
 257
 258                    let last_run_len = if let Some(link) = link.clone() {
 259                        region_ranges.push(prev_len..text.len());
 260                        regions.push(ParsedRegion {
 261                            code: false,
 262                            link: Some(link),
 263                        });
 264                        style.link = true;
 265                        prev_len
 266                    } else {
 267                        // Manually scan for links
 268                        let mut finder = linkify::LinkFinder::new();
 269                        finder.kinds(&[linkify::LinkKind::Url]);
 270                        let mut last_link_len = prev_len;
 271                        for link in finder.links(t) {
 272                            let start = link.start();
 273                            let end = link.end();
 274                            let range = (prev_len + start)..(prev_len + end);
 275                            link_ranges.push(range.clone());
 276                            link_urls.push(link.as_str().to_string());
 277
 278                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 279                            if style != MarkdownHighlightStyle::default()
 280                                && last_link_len < link.start()
 281                            {
 282                                highlights.push((
 283                                    last_link_len..link.start(),
 284                                    MarkdownHighlight::Style(style.clone()),
 285                                ));
 286                            }
 287
 288                            highlights.push((
 289                                range.clone(),
 290                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 291                                    underline: true,
 292                                    ..style
 293                                }),
 294                            ));
 295                            region_ranges.push(range.clone());
 296                            regions.push(ParsedRegion {
 297                                code: false,
 298                                link: Some(Link::Web {
 299                                    url: link.as_str().to_string(),
 300                                }),
 301                            });
 302                            last_link_len = end;
 303                        }
 304                        last_link_len
 305                    };
 306
 307                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 308                        let mut new_highlight = true;
 309                        if let Some((last_range, last_style)) = highlights.last_mut()
 310                            && last_range.end == last_run_len
 311                            && last_style == &MarkdownHighlight::Style(style.clone())
 312                        {
 313                            last_range.end = text.len();
 314                            new_highlight = false;
 315                        }
 316                        if new_highlight {
 317                            highlights.push((
 318                                last_run_len..text.len(),
 319                                MarkdownHighlight::Style(style.clone()),
 320                            ));
 321                        }
 322                    }
 323                }
 324                Event::Code(t) => {
 325                    text.push_str(t.as_ref());
 326                    region_ranges.push(prev_len..text.len());
 327
 328                    if link.is_some() {
 329                        highlights.push((
 330                            prev_len..text.len(),
 331                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 332                                link: true,
 333                                ..Default::default()
 334                            }),
 335                        ));
 336                    }
 337                    regions.push(ParsedRegion {
 338                        code: true,
 339                        link: link.clone(),
 340                    });
 341                }
 342                Event::Start(tag) => match tag {
 343                    Tag::Emphasis => italic_depth += 1,
 344                    Tag::Strong => bold_depth += 1,
 345                    Tag::Strikethrough => strikethrough_depth += 1,
 346                    Tag::Link { dest_url, .. } => {
 347                        link = Link::identify(
 348                            self.file_location_directory.clone(),
 349                            dest_url.to_string(),
 350                        );
 351                    }
 352                    Tag::Image { dest_url, .. } => {
 353                        if !text.is_empty() {
 354                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 355                                source_range: source_range.clone(),
 356                                contents: text.clone(),
 357                                highlights: highlights.clone(),
 358                                region_ranges: region_ranges.clone(),
 359                                regions: regions.clone(),
 360                            });
 361                            text = String::new();
 362                            highlights = vec![];
 363                            region_ranges = vec![];
 364                            regions = vec![];
 365                            markdown_text_like.push(parsed_regions);
 366                        }
 367                        image = Image::identify(
 368                            dest_url.to_string(),
 369                            source_range.clone(),
 370                            self.file_location_directory.clone(),
 371                        );
 372                    }
 373                    _ => {
 374                        break;
 375                    }
 376                },
 377
 378                Event::End(tag) => match tag {
 379                    TagEnd::Emphasis => italic_depth -= 1,
 380                    TagEnd::Strong => bold_depth -= 1,
 381                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 382                    TagEnd::Link => {
 383                        link = None;
 384                    }
 385                    TagEnd::Image => {
 386                        if let Some(mut image) = image.take() {
 387                            if !text.is_empty() {
 388                                image.set_alt_text(std::mem::take(&mut text).into());
 389                            }
 390                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 391                        }
 392                    }
 393                    TagEnd::Paragraph => {
 394                        self.cursor += 1;
 395                        break;
 396                    }
 397                    _ => {
 398                        break;
 399                    }
 400                },
 401                _ => {
 402                    break;
 403                }
 404            }
 405
 406            self.cursor += 1;
 407        }
 408        if !text.is_empty() {
 409            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 410                source_range,
 411                contents: text,
 412                highlights,
 413                regions,
 414                region_ranges,
 415            }));
 416        }
 417        markdown_text_like
 418    }
 419
 420    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 421        let (_event, source_range) = self.previous().unwrap();
 422        let source_range = source_range.clone();
 423        let text = self.parse_text(true, None);
 424
 425        // Advance past the heading end tag
 426        self.cursor += 1;
 427
 428        ParsedMarkdownHeading {
 429            source_range,
 430            level: match level {
 431                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 432                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 433                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 434                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 435                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 436                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 437            },
 438            contents: text,
 439        }
 440    }
 441
 442    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 443        let (_event, source_range) = self.previous().unwrap();
 444        let source_range = source_range.clone();
 445        let mut header = ParsedMarkdownTableRow::new();
 446        let mut body = vec![];
 447        let mut current_row = vec![];
 448        let mut in_header = true;
 449        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 450
 451        loop {
 452            if self.eof() {
 453                break;
 454            }
 455
 456            let (current, source_range) = self.current().unwrap();
 457            let source_range = source_range.clone();
 458            match current {
 459                Event::Start(Tag::TableHead)
 460                | Event::Start(Tag::TableRow)
 461                | Event::End(TagEnd::TableCell) => {
 462                    self.cursor += 1;
 463                }
 464                Event::Start(Tag::TableCell) => {
 465                    self.cursor += 1;
 466                    let cell_contents = self.parse_text(false, Some(source_range));
 467                    current_row.push(cell_contents);
 468                }
 469                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 470                    self.cursor += 1;
 471                    let new_row = std::mem::take(&mut current_row);
 472                    if in_header {
 473                        header.children = new_row;
 474                        in_header = false;
 475                    } else {
 476                        let row = ParsedMarkdownTableRow::with_children(new_row);
 477                        body.push(row);
 478                    }
 479                }
 480                Event::End(TagEnd::Table) => {
 481                    self.cursor += 1;
 482                    break;
 483                }
 484                _ => {
 485                    break;
 486                }
 487            }
 488        }
 489
 490        ParsedMarkdownTable {
 491            source_range,
 492            header,
 493            body,
 494            column_alignments,
 495        }
 496    }
 497
 498    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 499        match alignment {
 500            Alignment::None => ParsedMarkdownTableAlignment::None,
 501            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 502            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 503            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 504        }
 505    }
 506
 507    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 508        let (_, list_source_range) = self.previous().unwrap();
 509
 510        let mut items = Vec::new();
 511        let mut items_stack = vec![MarkdownListItem::default()];
 512        let mut depth = 1;
 513        let mut order = order;
 514        let mut order_stack = Vec::new();
 515
 516        let mut insertion_indices = FxHashMap::default();
 517        let mut source_ranges = FxHashMap::default();
 518        let mut start_item_range = list_source_range.clone();
 519
 520        while !self.eof() {
 521            let (current, source_range) = self.current().unwrap();
 522            match current {
 523                Event::Start(Tag::List(new_order)) => {
 524                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 525                        insertion_indices.insert(depth, items.len());
 526                    }
 527
 528                    // We will use the start of the nested list as the end for the current item's range,
 529                    // because we don't care about the hierarchy of list items
 530                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 531                        e.insert(start_item_range.start..source_range.start);
 532                    }
 533
 534                    order_stack.push(order);
 535                    order = *new_order;
 536                    self.cursor += 1;
 537                    depth += 1;
 538                }
 539                Event::End(TagEnd::List(_)) => {
 540                    order = order_stack.pop().flatten();
 541                    self.cursor += 1;
 542                    depth -= 1;
 543
 544                    if depth == 0 {
 545                        break;
 546                    }
 547                }
 548                Event::Start(Tag::Item) => {
 549                    start_item_range = source_range.clone();
 550
 551                    self.cursor += 1;
 552                    items_stack.push(MarkdownListItem::default());
 553
 554                    let mut task_list = None;
 555                    // Check for task list marker (`- [ ]` or `- [x]`)
 556                    if let Some(event) = self.current_event() {
 557                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 558                        if event == &Event::Start(Tag::Paragraph) {
 559                            self.cursor += 1;
 560                        }
 561
 562                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 563                            task_list = Some((*checked, range.clone()));
 564                            self.cursor += 1;
 565                        }
 566                    }
 567
 568                    if let Some((event, range)) = self.current() {
 569                        // This is a plain list item.
 570                        // For example `- some text` or `1. [Docs](./docs.md)`
 571                        if MarkdownParser::is_text_like(event) {
 572                            let text = self.parse_text(false, Some(range.clone()));
 573                            let block = ParsedMarkdownElement::Paragraph(text);
 574                            if let Some(content) = items_stack.last_mut() {
 575                                let item_type = if let Some((checked, range)) = task_list {
 576                                    ParsedMarkdownListItemType::Task(checked, range)
 577                                } else if let Some(order) = order {
 578                                    ParsedMarkdownListItemType::Ordered(order)
 579                                } else {
 580                                    ParsedMarkdownListItemType::Unordered
 581                                };
 582                                content.item_type = item_type;
 583                                content.content.push(block);
 584                            }
 585                        } else {
 586                            let block = self.parse_block().await;
 587                            if let Some(block) = block
 588                                && let Some(list_item) = items_stack.last_mut()
 589                            {
 590                                list_item.content.extend(block);
 591                            }
 592                        }
 593                    }
 594
 595                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 596                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 597                        self.cursor += 1;
 598                    }
 599                }
 600                Event::End(TagEnd::Item) => {
 601                    self.cursor += 1;
 602
 603                    if let Some(current) = order {
 604                        order = Some(current + 1);
 605                    }
 606
 607                    if let Some(list_item) = items_stack.pop() {
 608                        let source_range = source_ranges
 609                            .remove(&depth)
 610                            .unwrap_or(start_item_range.clone());
 611
 612                        // We need to remove the last character of the source range, because it includes the newline character
 613                        let source_range = source_range.start..source_range.end - 1;
 614                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 615                            source_range,
 616                            content: list_item.content,
 617                            depth,
 618                            item_type: list_item.item_type,
 619                        });
 620
 621                        if let Some(index) = insertion_indices.get(&depth) {
 622                            items.insert(*index, item);
 623                            insertion_indices.remove(&depth);
 624                        } else {
 625                            items.push(item);
 626                        }
 627                    }
 628                }
 629                _ => {
 630                    if depth == 0 {
 631                        break;
 632                    }
 633                    // This can only happen if a list item starts with more then one paragraph,
 634                    // or the list item contains blocks that should be rendered after the nested list items
 635                    let block = self.parse_block().await;
 636                    if let Some(block) = block {
 637                        if let Some(list_item) = items_stack.last_mut() {
 638                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 639                            if !insertion_indices.contains_key(&depth) {
 640                                list_item.content.extend(block);
 641                                continue;
 642                            }
 643                        }
 644
 645                        // Otherwise we need to insert the block after all the nested items
 646                        // that have been parsed so far
 647                        items.extend(block);
 648                    } else {
 649                        self.cursor += 1;
 650                    }
 651                }
 652            }
 653        }
 654
 655        items
 656    }
 657
 658    #[async_recursion]
 659    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 660        let (_event, source_range) = self.previous().unwrap();
 661        let source_range = source_range.clone();
 662        let mut nested_depth = 1;
 663
 664        let mut children: Vec<ParsedMarkdownElement> = vec![];
 665
 666        while !self.eof() {
 667            let block = self.parse_block().await;
 668
 669            if let Some(block) = block {
 670                children.extend(block);
 671            } else {
 672                break;
 673            }
 674
 675            if self.eof() {
 676                break;
 677            }
 678
 679            let (current, _source_range) = self.current().unwrap();
 680            match current {
 681                // This is a nested block quote.
 682                // Record that we're in a nested block quote and continue parsing.
 683                // We don't need to advance the cursor since the next
 684                // call to `parse_block` will handle it.
 685                Event::Start(Tag::BlockQuote(_kind)) => {
 686                    nested_depth += 1;
 687                }
 688                Event::End(TagEnd::BlockQuote(_kind)) => {
 689                    nested_depth -= 1;
 690                    if nested_depth == 0 {
 691                        self.cursor += 1;
 692                        break;
 693                    }
 694                }
 695                _ => {}
 696            };
 697        }
 698
 699        ParsedMarkdownBlockQuote {
 700            source_range,
 701            children,
 702        }
 703    }
 704
 705    async fn parse_code_block(
 706        &mut self,
 707        language: Option<String>,
 708    ) -> Option<ParsedMarkdownCodeBlock> {
 709        let Some((_event, source_range)) = self.previous() else {
 710            return None;
 711        };
 712
 713        let source_range = source_range.clone();
 714        let mut code = String::new();
 715
 716        while !self.eof() {
 717            let Some((current, _source_range)) = self.current() else {
 718                break;
 719            };
 720
 721            match current {
 722                Event::Text(text) => {
 723                    code.push_str(text);
 724                    self.cursor += 1;
 725                }
 726                Event::End(TagEnd::CodeBlock) => {
 727                    self.cursor += 1;
 728                    break;
 729                }
 730                _ => {
 731                    break;
 732                }
 733            }
 734        }
 735
 736        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 737
 738        let highlights = if let Some(language) = &language {
 739            if let Some(registry) = &self.language_registry {
 740                let rope: language::Rope = code.as_str().into();
 741                registry
 742                    .language_for_name_or_extension(language)
 743                    .await
 744                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 745                    .ok()
 746            } else {
 747                None
 748            }
 749        } else {
 750            None
 751        };
 752
 753        Some(ParsedMarkdownCodeBlock {
 754            source_range,
 755            contents: code.into(),
 756            language,
 757            highlights,
 758        })
 759    }
 760
 761    async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
 762        let mut elements = Vec::new();
 763        let Some((_event, _source_range)) = self.previous() else {
 764            return elements;
 765        };
 766
 767        while !self.eof() {
 768            let Some((current, source_range)) = self.current() else {
 769                break;
 770            };
 771            let source_range = source_range.clone();
 772            match current {
 773                Event::Html(html) => {
 774                    let mut cursor = std::io::Cursor::new(html.as_bytes());
 775                    let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
 776                        .from_utf8()
 777                        .read_from(&mut cursor)
 778                        .ok()
 779                    else {
 780                        self.cursor += 1;
 781                        continue;
 782                    };
 783
 784                    self.cursor += 1;
 785
 786                    self.parse_html_node(source_range, &dom.document, &mut elements);
 787                }
 788                Event::End(TagEnd::CodeBlock) => {
 789                    self.cursor += 1;
 790                    break;
 791                }
 792                _ => {
 793                    break;
 794                }
 795            }
 796        }
 797
 798        elements
 799    }
 800
 801    fn parse_html_node(
 802        &self,
 803        source_range: Range<usize>,
 804        node: &Rc<markup5ever_rcdom::Node>,
 805        elements: &mut Vec<ParsedMarkdownElement>,
 806    ) {
 807        match &node.data {
 808            markup5ever_rcdom::NodeData::Document => {
 809                self.consume_children(source_range, node, elements);
 810            }
 811            markup5ever_rcdom::NodeData::Doctype { .. } => {}
 812            markup5ever_rcdom::NodeData::Text { contents } => {
 813                elements.push(ParsedMarkdownElement::Paragraph(vec![
 814                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
 815                        source_range,
 816                        contents: contents.borrow().to_string(),
 817                        highlights: Vec::default(),
 818                        region_ranges: Vec::default(),
 819                        regions: Vec::default(),
 820                    }),
 821                ]));
 822            }
 823            markup5ever_rcdom::NodeData::Comment { .. } => {}
 824            markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
 825                if local_name!("img") == name.local {
 826                    if let Some(image) = self.extract_image(source_range, attrs) {
 827                        elements.push(ParsedMarkdownElement::Image(image));
 828                    }
 829                } else if matches!(
 830                    name.local,
 831                    local_name!("h1")
 832                        | local_name!("h2")
 833                        | local_name!("h3")
 834                        | local_name!("h4")
 835                        | local_name!("h5")
 836                        | local_name!("h6")
 837                ) {
 838                    let mut paragraph = MarkdownParagraph::new();
 839                    self.consume_paragraph(source_range.clone(), node, &mut paragraph);
 840
 841                    if !paragraph.is_empty() {
 842                        elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
 843                            source_range,
 844                            level: match name.local {
 845                                local_name!("h1") => HeadingLevel::H1,
 846                                local_name!("h2") => HeadingLevel::H2,
 847                                local_name!("h3") => HeadingLevel::H3,
 848                                local_name!("h4") => HeadingLevel::H4,
 849                                local_name!("h5") => HeadingLevel::H5,
 850                                local_name!("h6") => HeadingLevel::H6,
 851                                _ => unreachable!(),
 852                            },
 853                            contents: paragraph,
 854                        }));
 855                    }
 856                } else {
 857                    self.consume_children(source_range, node, elements);
 858                }
 859            }
 860            markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
 861        }
 862    }
 863
 864    fn parse_paragraph(
 865        &self,
 866        source_range: Range<usize>,
 867        node: &Rc<markup5ever_rcdom::Node>,
 868        paragraph: &mut MarkdownParagraph,
 869    ) {
 870        match &node.data {
 871            markup5ever_rcdom::NodeData::Text { contents } => {
 872                paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 873                    source_range,
 874                    regions: Vec::default(),
 875                    contents: contents.borrow().to_string(),
 876                    region_ranges: Vec::default(),
 877                    highlights: Vec::default(),
 878                }));
 879            }
 880            markup5ever_rcdom::NodeData::Element { .. } => {
 881                self.consume_paragraph(source_range, node, paragraph);
 882            }
 883            _ => {}
 884        }
 885    }
 886
 887    fn consume_paragraph(
 888        &self,
 889        source_range: Range<usize>,
 890        node: &Rc<markup5ever_rcdom::Node>,
 891        paragraph: &mut MarkdownParagraph,
 892    ) {
 893        for node in node.children.borrow().iter() {
 894            self.parse_paragraph(source_range.clone(), node, paragraph);
 895        }
 896    }
 897
 898    fn consume_children(
 899        &self,
 900        source_range: Range<usize>,
 901        node: &Rc<markup5ever_rcdom::Node>,
 902        elements: &mut Vec<ParsedMarkdownElement>,
 903    ) {
 904        for node in node.children.borrow().iter() {
 905            self.parse_html_node(source_range.clone(), node, elements);
 906        }
 907    }
 908
 909    fn attr_value(
 910        attrs: &RefCell<Vec<html5ever::Attribute>>,
 911        name: html5ever::LocalName,
 912    ) -> Option<String> {
 913        attrs.borrow().iter().find_map(|attr| {
 914            if attr.name.local == name {
 915                Some(attr.value.to_string())
 916            } else {
 917                None
 918            }
 919        })
 920    }
 921
 922    fn extract_styles_from_attributes(
 923        attrs: &RefCell<Vec<html5ever::Attribute>>,
 924    ) -> HashMap<String, String> {
 925        let mut styles = HashMap::new();
 926
 927        if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
 928            for decl in style.split(';') {
 929                let mut parts = decl.splitn(2, ':');
 930                if let Some((key, value)) = parts.next().zip(parts.next()) {
 931                    styles.insert(
 932                        key.trim().to_lowercase().to_string(),
 933                        value.trim().to_string(),
 934                    );
 935                }
 936            }
 937        }
 938
 939        styles
 940    }
 941
 942    fn extract_image(
 943        &self,
 944        source_range: Range<usize>,
 945        attrs: &RefCell<Vec<html5ever::Attribute>>,
 946    ) -> Option<Image> {
 947        let src = Self::attr_value(attrs, local_name!("src"))?;
 948
 949        let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
 950
 951        if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
 952            image.set_alt_text(alt.into());
 953        }
 954
 955        let styles = Self::extract_styles_from_attributes(attrs);
 956
 957        if let Some(width) = Self::attr_value(attrs, local_name!("width"))
 958            .or_else(|| styles.get("width").cloned())
 959            .and_then(|width| Self::parse_length(&width))
 960        {
 961            image.set_width(width);
 962        }
 963
 964        if let Some(height) = Self::attr_value(attrs, local_name!("height"))
 965            .or_else(|| styles.get("height").cloned())
 966            .and_then(|height| Self::parse_length(&height))
 967        {
 968            image.set_height(height);
 969        }
 970
 971        Some(image)
 972    }
 973
 974    /// Parses the width/height attribute value of an html element (e.g. img element)
 975    fn parse_length(value: &str) -> Option<DefiniteLength> {
 976        if value.ends_with("%") {
 977            value
 978                .trim_end_matches("%")
 979                .parse::<f32>()
 980                .ok()
 981                .map(|value| relative(value / 100.))
 982        } else {
 983            value
 984                .trim_end_matches("px")
 985                .parse()
 986                .ok()
 987                .map(|value| px(value).into())
 988        }
 989    }
 990}
 991
 992#[cfg(test)]
 993mod tests {
 994    use super::*;
 995    use ParsedMarkdownListItemType::*;
 996    use core::panic;
 997    use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
 998    use language::{
 999        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1000    };
1001    use pretty_assertions::assert_eq;
1002
1003    async fn parse(input: &str) -> ParsedMarkdown {
1004        parse_markdown(input, None, None).await
1005    }
1006
1007    #[gpui::test]
1008    async fn test_headings() {
1009        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1010
1011        assert_eq!(
1012            parsed.children,
1013            vec![
1014                h1(text("Heading one", 2..13), 0..14),
1015                h2(text("Heading two", 17..28), 14..29),
1016                h3(text("Heading three", 33..46), 29..46),
1017            ]
1018        );
1019    }
1020
1021    #[gpui::test]
1022    async fn test_newlines_dont_new_paragraphs() {
1023        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1024
1025        assert_eq!(
1026            parsed.children,
1027            vec![p("Some text that is bolded and italicized", 0..46)]
1028        );
1029    }
1030
1031    #[gpui::test]
1032    async fn test_heading_with_paragraph() {
1033        let parsed = parse("# Zed\nThe editor").await;
1034
1035        assert_eq!(
1036            parsed.children,
1037            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1038        );
1039    }
1040
1041    #[gpui::test]
1042    async fn test_double_newlines_do_new_paragraphs() {
1043        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1044
1045        assert_eq!(
1046            parsed.children,
1047            vec![
1048                p("Some text that is bolded", 0..29),
1049                p("and italicized", 31..47),
1050            ]
1051        );
1052    }
1053
1054    #[gpui::test]
1055    async fn test_bold_italic_text() {
1056        let parsed = parse("Some text **that is bolded** and *italicized*").await;
1057
1058        assert_eq!(
1059            parsed.children,
1060            vec![p("Some text that is bolded and italicized", 0..45)]
1061        );
1062    }
1063
1064    #[gpui::test]
1065    async fn test_nested_bold_strikethrough_text() {
1066        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1067
1068        assert_eq!(parsed.children.len(), 1);
1069        assert_eq!(
1070            parsed.children[0],
1071            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1072                ParsedMarkdownText {
1073                    source_range: 0..35,
1074                    contents: "Some bostrikethroughld text".to_string(),
1075                    highlights: Vec::new(),
1076                    region_ranges: Vec::new(),
1077                    regions: Vec::new(),
1078                }
1079            )])
1080        );
1081
1082        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1083            text
1084        } else {
1085            panic!("Expected a paragraph");
1086        };
1087
1088        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1089            text
1090        } else {
1091            panic!("Expected a text");
1092        };
1093
1094        assert_eq!(
1095            paragraph.highlights,
1096            vec![
1097                (
1098                    5..7,
1099                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1100                        weight: FontWeight::BOLD,
1101                        ..Default::default()
1102                    }),
1103                ),
1104                (
1105                    7..20,
1106                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1107                        weight: FontWeight::BOLD,
1108                        strikethrough: true,
1109                        ..Default::default()
1110                    }),
1111                ),
1112                (
1113                    20..22,
1114                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1115                        weight: FontWeight::BOLD,
1116                        ..Default::default()
1117                    }),
1118                ),
1119            ]
1120        );
1121    }
1122
1123    #[gpui::test]
1124    async fn test_text_with_inline_html() {
1125        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1126
1127        assert_eq!(
1128            parsed.children,
1129            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1130        );
1131    }
1132
1133    #[gpui::test]
1134    async fn test_raw_links_detection() {
1135        let parsed = parse("Checkout this https://zed.dev link").await;
1136
1137        assert_eq!(
1138            parsed.children,
1139            vec![p("Checkout this https://zed.dev link", 0..34)]
1140        );
1141    }
1142
1143    #[gpui::test]
1144    async fn test_empty_image() {
1145        let parsed = parse("![]()").await;
1146
1147        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1148            text
1149        } else {
1150            panic!("Expected a paragraph");
1151        };
1152        assert_eq!(paragraph.len(), 0);
1153    }
1154
1155    #[gpui::test]
1156    async fn test_image_links_detection() {
1157        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
1158
1159        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1160            text
1161        } else {
1162            panic!("Expected a paragraph");
1163        };
1164        assert_eq!(
1165            paragraph[0],
1166            MarkdownParagraphChunk::Image(Image {
1167                source_range: 0..111,
1168                link: Link::Web {
1169                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1170                },
1171                alt_text: Some("test".into()),
1172                height: None,
1173                width: None,
1174            },)
1175        );
1176    }
1177
1178    #[gpui::test]
1179    async fn test_image_without_alt_text() {
1180        let parsed = parse("![](http://example.com/foo.png)").await;
1181
1182        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1183            text
1184        } else {
1185            panic!("Expected a paragraph");
1186        };
1187        assert_eq!(
1188            paragraph[0],
1189            MarkdownParagraphChunk::Image(Image {
1190                source_range: 0..31,
1191                link: Link::Web {
1192                    url: "http://example.com/foo.png".to_string(),
1193                },
1194                alt_text: None,
1195                height: None,
1196                width: None,
1197            },)
1198        );
1199    }
1200
1201    #[gpui::test]
1202    async fn test_image_with_alt_text_containing_formatting() {
1203        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
1204
1205        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1206            panic!("Expected a paragraph");
1207        };
1208        assert_eq!(
1209            chunks,
1210            &[MarkdownParagraphChunk::Image(Image {
1211                source_range: 0..44,
1212                link: Link::Web {
1213                    url: "http://example.com/foo.png".to_string(),
1214                },
1215                alt_text: Some("foo bar baz".into()),
1216                height: None,
1217                width: None,
1218            }),],
1219        );
1220    }
1221
1222    #[gpui::test]
1223    async fn test_images_with_text_in_between() {
1224        let parsed = parse(
1225            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
1226        )
1227        .await;
1228
1229        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1230            text
1231        } else {
1232            panic!("Expected a paragraph");
1233        };
1234        assert_eq!(
1235            chunks,
1236            &vec![
1237                MarkdownParagraphChunk::Image(Image {
1238                    source_range: 0..81,
1239                    link: Link::Web {
1240                        url: "http://example.com/foo.png".to_string(),
1241                    },
1242                    alt_text: Some("foo".into()),
1243                    height: None,
1244                    width: None,
1245                }),
1246                MarkdownParagraphChunk::Text(ParsedMarkdownText {
1247                    source_range: 0..81,
1248                    contents: " Lorem Ipsum ".to_string(),
1249                    highlights: Vec::new(),
1250                    region_ranges: Vec::new(),
1251                    regions: Vec::new(),
1252                }),
1253                MarkdownParagraphChunk::Image(Image {
1254                    source_range: 0..81,
1255                    link: Link::Web {
1256                        url: "http://example.com/bar.png".to_string(),
1257                    },
1258                    alt_text: Some("bar".into()),
1259                    height: None,
1260                    width: None,
1261                })
1262            ]
1263        );
1264    }
1265
1266    #[test]
1267    fn test_parse_length() {
1268        // Test percentage values
1269        assert_eq!(
1270            MarkdownParser::parse_length("50%"),
1271            Some(DefiniteLength::Fraction(0.5))
1272        );
1273        assert_eq!(
1274            MarkdownParser::parse_length("100%"),
1275            Some(DefiniteLength::Fraction(1.0))
1276        );
1277        assert_eq!(
1278            MarkdownParser::parse_length("25%"),
1279            Some(DefiniteLength::Fraction(0.25))
1280        );
1281        assert_eq!(
1282            MarkdownParser::parse_length("0%"),
1283            Some(DefiniteLength::Fraction(0.0))
1284        );
1285
1286        // Test pixel values
1287        assert_eq!(
1288            MarkdownParser::parse_length("100px"),
1289            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1290        );
1291        assert_eq!(
1292            MarkdownParser::parse_length("50px"),
1293            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1294        );
1295        assert_eq!(
1296            MarkdownParser::parse_length("0px"),
1297            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1298        );
1299
1300        // Test values without units (should be treated as pixels)
1301        assert_eq!(
1302            MarkdownParser::parse_length("100"),
1303            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1304        );
1305        assert_eq!(
1306            MarkdownParser::parse_length("42"),
1307            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1308        );
1309
1310        // Test invalid values
1311        assert_eq!(MarkdownParser::parse_length("invalid"), None);
1312        assert_eq!(MarkdownParser::parse_length("px"), None);
1313        assert_eq!(MarkdownParser::parse_length("%"), None);
1314        assert_eq!(MarkdownParser::parse_length(""), None);
1315        assert_eq!(MarkdownParser::parse_length("abc%"), None);
1316        assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1317
1318        // Test decimal values
1319        assert_eq!(
1320            MarkdownParser::parse_length("50.5%"),
1321            Some(DefiniteLength::Fraction(0.505))
1322        );
1323        assert_eq!(
1324            MarkdownParser::parse_length("100.25px"),
1325            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1326        );
1327        assert_eq!(
1328            MarkdownParser::parse_length("42.0"),
1329            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1330        );
1331    }
1332
1333    #[gpui::test]
1334    async fn test_html_heading_tags() {
1335        let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1336
1337        assert_eq!(
1338            ParsedMarkdown {
1339                children: vec![
1340                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1341                        level: HeadingLevel::H1,
1342                        source_range: 0..96,
1343                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1344                            source_range: 0..96,
1345                            contents: "Heading".into(),
1346                            highlights: Vec::default(),
1347                            region_ranges: Vec::default(),
1348                            regions: Vec::default()
1349                        })],
1350                    }),
1351                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1352                        level: HeadingLevel::H2,
1353                        source_range: 0..96,
1354                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1355                            source_range: 0..96,
1356                            contents: "Heading".into(),
1357                            highlights: Vec::default(),
1358                            region_ranges: Vec::default(),
1359                            regions: Vec::default()
1360                        })],
1361                    }),
1362                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1363                        level: HeadingLevel::H3,
1364                        source_range: 0..96,
1365                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1366                            source_range: 0..96,
1367                            contents: "Heading".into(),
1368                            highlights: Vec::default(),
1369                            region_ranges: Vec::default(),
1370                            regions: Vec::default()
1371                        })],
1372                    }),
1373                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1374                        level: HeadingLevel::H4,
1375                        source_range: 0..96,
1376                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1377                            source_range: 0..96,
1378                            contents: "Heading".into(),
1379                            highlights: Vec::default(),
1380                            region_ranges: Vec::default(),
1381                            regions: Vec::default()
1382                        })],
1383                    }),
1384                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1385                        level: HeadingLevel::H5,
1386                        source_range: 0..96,
1387                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1388                            source_range: 0..96,
1389                            contents: "Heading".into(),
1390                            highlights: Vec::default(),
1391                            region_ranges: Vec::default(),
1392                            regions: Vec::default()
1393                        })],
1394                    }),
1395                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1396                        level: HeadingLevel::H6,
1397                        source_range: 0..96,
1398                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1399                            source_range: 0..96,
1400                            contents: "Heading".into(),
1401                            highlights: Vec::default(),
1402                            region_ranges: Vec::default(),
1403                            regions: Vec::default()
1404                        })],
1405                    }),
1406                ],
1407            },
1408            parsed
1409        );
1410    }
1411
1412    #[gpui::test]
1413    async fn test_html_image_tag() {
1414        let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1415
1416        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1417            panic!("Expected a image element");
1418        };
1419        assert_eq!(
1420            image.clone(),
1421            Image {
1422                source_range: 0..40,
1423                link: Link::Web {
1424                    url: "http://example.com/foo.png".to_string(),
1425                },
1426                alt_text: None,
1427                height: None,
1428                width: None,
1429            },
1430        );
1431    }
1432
1433    #[gpui::test]
1434    async fn test_html_image_tag_with_alt_text() {
1435        let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1436
1437        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1438            panic!("Expected a image element");
1439        };
1440        assert_eq!(
1441            image.clone(),
1442            Image {
1443                source_range: 0..50,
1444                link: Link::Web {
1445                    url: "http://example.com/foo.png".to_string(),
1446                },
1447                alt_text: Some("Foo".into()),
1448                height: None,
1449                width: None,
1450            },
1451        );
1452    }
1453
1454    #[gpui::test]
1455    async fn test_html_image_tag_with_height_and_width() {
1456        let parsed =
1457            parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1458
1459        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1460            panic!("Expected a image element");
1461        };
1462        assert_eq!(
1463            image.clone(),
1464            Image {
1465                source_range: 0..65,
1466                link: Link::Web {
1467                    url: "http://example.com/foo.png".to_string(),
1468                },
1469                alt_text: None,
1470                height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1471                width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1472            },
1473        );
1474    }
1475
1476    #[gpui::test]
1477    async fn test_html_image_style_tag_with_height_and_width() {
1478        let parsed = parse(
1479            "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1480        )
1481        .await;
1482
1483        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1484            panic!("Expected a image element");
1485        };
1486        assert_eq!(
1487            image.clone(),
1488            Image {
1489                source_range: 0..75,
1490                link: Link::Web {
1491                    url: "http://example.com/foo.png".to_string(),
1492                },
1493                alt_text: None,
1494                height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1495                width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1496            },
1497        );
1498    }
1499
1500    #[gpui::test]
1501    async fn test_header_only_table() {
1502        let markdown = "\
1503| Header 1 | Header 2 |
1504|----------|----------|
1505
1506Some other content
1507";
1508
1509        let expected_table = table(
1510            0..48,
1511            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1512            vec![],
1513        );
1514
1515        assert_eq!(
1516            parse(markdown).await.children[0],
1517            ParsedMarkdownElement::Table(expected_table)
1518        );
1519    }
1520
1521    #[gpui::test]
1522    async fn test_basic_table() {
1523        let markdown = "\
1524| Header 1 | Header 2 |
1525|----------|----------|
1526| Cell 1   | Cell 2   |
1527| Cell 3   | Cell 4   |";
1528
1529        let expected_table = table(
1530            0..95,
1531            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1532            vec![
1533                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1534                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1535            ],
1536        );
1537
1538        assert_eq!(
1539            parse(markdown).await.children[0],
1540            ParsedMarkdownElement::Table(expected_table)
1541        );
1542    }
1543
1544    #[gpui::test]
1545    async fn test_list_basic() {
1546        let parsed = parse(
1547            "\
1548* Item 1
1549* Item 2
1550* Item 3
1551",
1552        )
1553        .await;
1554
1555        assert_eq!(
1556            parsed.children,
1557            vec![
1558                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1559                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1560                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1561            ],
1562        );
1563    }
1564
1565    #[gpui::test]
1566    async fn test_list_with_tasks() {
1567        let parsed = parse(
1568            "\
1569- [ ] TODO
1570- [x] Checked
1571",
1572        )
1573        .await;
1574
1575        assert_eq!(
1576            parsed.children,
1577            vec![
1578                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1579                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1580            ],
1581        );
1582    }
1583
1584    #[gpui::test]
1585    async fn test_list_with_indented_task() {
1586        let parsed = parse(
1587            "\
1588- [ ] TODO
1589  - [x] Checked
1590  - Unordered
1591  1. Number 1
1592  1. Number 2
15931. Number A
1594",
1595        )
1596        .await;
1597
1598        assert_eq!(
1599            parsed.children,
1600            vec![
1601                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1602                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1603                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1604                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1605                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1606                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1607            ],
1608        );
1609    }
1610
1611    #[gpui::test]
1612    async fn test_list_with_linebreak_is_handled_correctly() {
1613        let parsed = parse(
1614            "\
1615- [ ] Task 1
1616
1617- [x] Task 2
1618",
1619        )
1620        .await;
1621
1622        assert_eq!(
1623            parsed.children,
1624            vec![
1625                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1626                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1627            ],
1628        );
1629    }
1630
1631    #[gpui::test]
1632    async fn test_list_nested() {
1633        let parsed = parse(
1634            "\
1635* Item 1
1636* Item 2
1637* Item 3
1638
16391. Hello
16401. Two
1641   1. Three
16422. Four
16433. Five
1644
1645* First
1646  1. Hello
1647     1. Goodbyte
1648        - Inner
1649        - Inner
1650  2. Goodbyte
1651        - Next item empty
1652        -
1653* Last
1654",
1655        )
1656        .await;
1657
1658        assert_eq!(
1659            parsed.children,
1660            vec![
1661                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1662                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1663                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1664                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1665                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1666                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1667                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1668                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1669                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1670                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1671                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1672                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1673                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1674                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1675                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1676                list_item(186..190, 3, Unordered, vec![]),
1677                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1678            ]
1679        );
1680    }
1681
1682    #[gpui::test]
1683    async fn test_list_with_nested_content() {
1684        let parsed = parse(
1685            "\
1686*   This is a list item with two paragraphs.
1687
1688    This is the second paragraph in the list item.
1689",
1690        )
1691        .await;
1692
1693        assert_eq!(
1694            parsed.children,
1695            vec![list_item(
1696                0..96,
1697                1,
1698                Unordered,
1699                vec![
1700                    p("This is a list item with two paragraphs.", 4..44),
1701                    p("This is the second paragraph in the list item.", 50..97)
1702                ],
1703            ),],
1704        );
1705    }
1706
1707    #[gpui::test]
1708    async fn test_list_item_with_inline_html() {
1709        let parsed = parse(
1710            "\
1711*   This is a list item with an inline HTML <sometag>tag</sometag>.
1712",
1713        )
1714        .await;
1715
1716        assert_eq!(
1717            parsed.children,
1718            vec![list_item(
1719                0..67,
1720                1,
1721                Unordered,
1722                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1723            ),],
1724        );
1725    }
1726
1727    #[gpui::test]
1728    async fn test_nested_list_with_paragraph_inside() {
1729        let parsed = parse(
1730            "\
17311. a
1732    1. b
1733        1. c
1734
1735    text
1736
1737    1. d
1738",
1739        )
1740        .await;
1741
1742        assert_eq!(
1743            parsed.children,
1744            vec![
1745                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1746                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1747                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1748                p("text", 32..37),
1749                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1750            ],
1751        );
1752    }
1753
1754    #[gpui::test]
1755    async fn test_list_with_leading_text() {
1756        let parsed = parse(
1757            "\
1758* `code`
1759* **bold**
1760* [link](https://example.com)
1761",
1762        )
1763        .await;
1764
1765        assert_eq!(
1766            parsed.children,
1767            vec![
1768                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1769                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1770                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1771            ],
1772        );
1773    }
1774
1775    #[gpui::test]
1776    async fn test_simple_block_quote() {
1777        let parsed = parse("> Simple block quote with **styled text**").await;
1778
1779        assert_eq!(
1780            parsed.children,
1781            vec![block_quote(
1782                vec![p("Simple block quote with styled text", 2..41)],
1783                0..41
1784            )]
1785        );
1786    }
1787
1788    #[gpui::test]
1789    async fn test_simple_block_quote_with_multiple_lines() {
1790        let parsed = parse(
1791            "\
1792> # Heading
1793> More
1794> text
1795>
1796> More text
1797",
1798        )
1799        .await;
1800
1801        assert_eq!(
1802            parsed.children,
1803            vec![block_quote(
1804                vec![
1805                    h1(text("Heading", 4..11), 2..12),
1806                    p("More text", 14..26),
1807                    p("More text", 30..40)
1808                ],
1809                0..40
1810            )]
1811        );
1812    }
1813
1814    #[gpui::test]
1815    async fn test_nested_block_quote() {
1816        let parsed = parse(
1817            "\
1818> A
1819>
1820> > # B
1821>
1822> C
1823
1824More text
1825",
1826        )
1827        .await;
1828
1829        assert_eq!(
1830            parsed.children,
1831            vec![
1832                block_quote(
1833                    vec![
1834                        p("A", 2..4),
1835                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1836                        p("C", 18..20)
1837                    ],
1838                    0..20
1839                ),
1840                p("More text", 21..31)
1841            ]
1842        );
1843    }
1844
1845    #[gpui::test]
1846    async fn test_code_block() {
1847        let parsed = parse(
1848            "\
1849```
1850fn main() {
1851    return 0;
1852}
1853```
1854",
1855        )
1856        .await;
1857
1858        assert_eq!(
1859            parsed.children,
1860            vec![code_block(
1861                None,
1862                "fn main() {\n    return 0;\n}",
1863                0..35,
1864                None
1865            )]
1866        );
1867    }
1868
1869    #[gpui::test]
1870    async fn test_code_block_with_language(executor: BackgroundExecutor) {
1871        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1872        language_registry.add(rust_lang());
1873
1874        let parsed = parse_markdown(
1875            "\
1876```rust
1877fn main() {
1878    return 0;
1879}
1880```
1881",
1882            None,
1883            Some(language_registry),
1884        )
1885        .await;
1886
1887        assert_eq!(
1888            parsed.children,
1889            vec![code_block(
1890                Some("rust".to_string()),
1891                "fn main() {\n    return 0;\n}",
1892                0..39,
1893                Some(vec![])
1894            )]
1895        );
1896    }
1897
1898    fn rust_lang() -> Arc<Language> {
1899        Arc::new(Language::new(
1900            LanguageConfig {
1901                name: "Rust".into(),
1902                matcher: LanguageMatcher {
1903                    path_suffixes: vec!["rs".into()],
1904                    ..Default::default()
1905                },
1906                collapsed_placeholder: " /* ... */ ".to_string(),
1907                ..Default::default()
1908            },
1909            Some(tree_sitter_rust::LANGUAGE.into()),
1910        ))
1911    }
1912
1913    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1914        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1915            source_range,
1916            level: HeadingLevel::H1,
1917            contents,
1918        })
1919    }
1920
1921    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1922        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1923            source_range,
1924            level: HeadingLevel::H2,
1925            contents,
1926        })
1927    }
1928
1929    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1930        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1931            source_range,
1932            level: HeadingLevel::H3,
1933            contents,
1934        })
1935    }
1936
1937    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1938        ParsedMarkdownElement::Paragraph(text(contents, source_range))
1939    }
1940
1941    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1942        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1943            highlights: Vec::new(),
1944            region_ranges: Vec::new(),
1945            regions: Vec::new(),
1946            source_range,
1947            contents: contents.to_string(),
1948        })]
1949    }
1950
1951    fn block_quote(
1952        children: Vec<ParsedMarkdownElement>,
1953        source_range: Range<usize>,
1954    ) -> ParsedMarkdownElement {
1955        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1956            source_range,
1957            children,
1958        })
1959    }
1960
1961    fn code_block(
1962        language: Option<String>,
1963        code: &str,
1964        source_range: Range<usize>,
1965        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1966    ) -> ParsedMarkdownElement {
1967        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1968            source_range,
1969            language,
1970            contents: code.to_string().into(),
1971            highlights,
1972        })
1973    }
1974
1975    fn list_item(
1976        source_range: Range<usize>,
1977        depth: u16,
1978        item_type: ParsedMarkdownListItemType,
1979        content: Vec<ParsedMarkdownElement>,
1980    ) -> ParsedMarkdownElement {
1981        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1982            source_range,
1983            item_type,
1984            depth,
1985            content,
1986        })
1987    }
1988
1989    fn table(
1990        source_range: Range<usize>,
1991        header: ParsedMarkdownTableRow,
1992        body: Vec<ParsedMarkdownTableRow>,
1993    ) -> ParsedMarkdownTable {
1994        ParsedMarkdownTable {
1995            column_alignments: Vec::new(),
1996            source_range,
1997            header,
1998            body,
1999        }
2000    }
2001
2002    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2003        ParsedMarkdownTableRow { children }
2004    }
2005
2006    impl PartialEq for ParsedMarkdownTable {
2007        fn eq(&self, other: &Self) -> bool {
2008            self.source_range == other.source_range
2009                && self.header == other.header
2010                && self.body == other.body
2011        }
2012    }
2013
2014    impl PartialEq for ParsedMarkdownText {
2015        fn eq(&self, other: &Self) -> bool {
2016            self.source_range == other.source_range && self.contents == other.contents
2017        }
2018    }
2019}