markdown_parser.rs

   1use crate::{
   2    markdown_elements::*,
   3    markdown_minifier::{Minifier, MinifierOptions},
   4};
   5use async_recursion::async_recursion;
   6use collections::FxHashMap;
   7use gpui::{DefiniteLength, FontWeight, px, relative};
   8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
   9use language::LanguageRegistry;
  10use markup5ever_rcdom::RcDom;
  11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
  12use std::{
  13    cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
  14};
  15
  16pub async fn parse_markdown(
  17    markdown_input: &str,
  18    file_location_directory: Option<PathBuf>,
  19    language_registry: Option<Arc<LanguageRegistry>>,
  20) -> ParsedMarkdown {
  21    let mut options = Options::all();
  22    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  23
  24    let parser = Parser::new_ext(markdown_input, options);
  25    let parser = MarkdownParser::new(
  26        parser.into_offset_iter().collect(),
  27        file_location_directory,
  28        language_registry,
  29    );
  30    let renderer = parser.parse_document().await;
  31    ParsedMarkdown {
  32        children: renderer.parsed,
  33    }
  34}
  35
  36fn cleanup_html(source: &str) -> Vec<u8> {
  37    let mut writer = std::io::Cursor::new(Vec::new());
  38    let mut reader = std::io::Cursor::new(source);
  39    let mut minify = Minifier::new(
  40        &mut writer,
  41        MinifierOptions {
  42            omit_doctype: true,
  43            collapse_whitespace: true,
  44            ..Default::default()
  45        },
  46    );
  47    if let Ok(()) = minify.minify(&mut reader) {
  48        writer.into_inner()
  49    } else {
  50        source.bytes().collect()
  51    }
  52}
  53
  54struct MarkdownParser<'a> {
  55    tokens: Vec<(Event<'a>, Range<usize>)>,
  56    /// The current index in the tokens array
  57    cursor: usize,
  58    /// The blocks that we have successfully parsed so far
  59    parsed: Vec<ParsedMarkdownElement>,
  60    file_location_directory: Option<PathBuf>,
  61    language_registry: Option<Arc<LanguageRegistry>>,
  62}
  63
  64#[derive(Debug)]
  65struct ParseHtmlNodeContext {
  66    list_item_depth: u16,
  67}
  68
  69impl Default for ParseHtmlNodeContext {
  70    fn default() -> Self {
  71        Self { list_item_depth: 1 }
  72    }
  73}
  74
  75struct MarkdownListItem {
  76    content: Vec<ParsedMarkdownElement>,
  77    item_type: ParsedMarkdownListItemType,
  78}
  79
  80impl Default for MarkdownListItem {
  81    fn default() -> Self {
  82        Self {
  83            content: Vec::new(),
  84            item_type: ParsedMarkdownListItemType::Unordered,
  85        }
  86    }
  87}
  88
  89impl<'a> MarkdownParser<'a> {
  90    fn new(
  91        tokens: Vec<(Event<'a>, Range<usize>)>,
  92        file_location_directory: Option<PathBuf>,
  93        language_registry: Option<Arc<LanguageRegistry>>,
  94    ) -> Self {
  95        Self {
  96            tokens,
  97            file_location_directory,
  98            language_registry,
  99            cursor: 0,
 100            parsed: vec![],
 101        }
 102    }
 103
 104    fn eof(&self) -> bool {
 105        if self.tokens.is_empty() {
 106            return true;
 107        }
 108        self.cursor >= self.tokens.len() - 1
 109    }
 110
 111    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
 112        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
 113            return self.tokens.last();
 114        }
 115        self.tokens.get(self.cursor + steps)
 116    }
 117
 118    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
 119        if self.cursor == 0 || self.cursor > self.tokens.len() {
 120            return None;
 121        }
 122        self.tokens.get(self.cursor - 1)
 123    }
 124
 125    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
 126        self.peek(0)
 127    }
 128
 129    fn current_event(&self) -> Option<&Event<'_>> {
 130        self.current().map(|(event, _)| event)
 131    }
 132
 133    fn is_text_like(event: &Event) -> bool {
 134        match event {
 135            Event::Text(_)
 136            // Represent an inline code block
 137            | Event::Code(_)
 138            | Event::Html(_)
 139            | Event::InlineHtml(_)
 140            | Event::FootnoteReference(_)
 141            | Event::Start(Tag::Link { .. })
 142            | Event::Start(Tag::Emphasis)
 143            | Event::Start(Tag::Strong)
 144            | Event::Start(Tag::Strikethrough)
 145            | Event::Start(Tag::Image { .. }) => {
 146                true
 147            }
 148            _ => false,
 149        }
 150    }
 151
 152    async fn parse_document(mut self) -> Self {
 153        while !self.eof() {
 154            if let Some(block) = self.parse_block().await {
 155                self.parsed.extend(block);
 156            } else {
 157                self.cursor += 1;
 158            }
 159        }
 160        self
 161    }
 162
 163    #[async_recursion]
 164    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 165        let (current, source_range) = self.current().unwrap();
 166        let source_range = source_range.clone();
 167        match current {
 168            Event::Start(tag) => match tag {
 169                Tag::Paragraph => {
 170                    self.cursor += 1;
 171                    let text = self.parse_text(false, Some(source_range));
 172                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 173                }
 174                Tag::Heading { level, .. } => {
 175                    let level = *level;
 176                    self.cursor += 1;
 177                    let heading = self.parse_heading(level);
 178                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 179                }
 180                Tag::Table(alignment) => {
 181                    let alignment = alignment.clone();
 182                    self.cursor += 1;
 183                    let table = self.parse_table(alignment);
 184                    Some(vec![ParsedMarkdownElement::Table(table)])
 185                }
 186                Tag::List(order) => {
 187                    let order = *order;
 188                    self.cursor += 1;
 189                    let list = self.parse_list(order).await;
 190                    Some(list)
 191                }
 192                Tag::BlockQuote(_kind) => {
 193                    self.cursor += 1;
 194                    let block_quote = self.parse_block_quote().await;
 195                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 196                }
 197                Tag::CodeBlock(kind) => {
 198                    let language = match kind {
 199                        pulldown_cmark::CodeBlockKind::Indented => None,
 200                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 201                            if language.is_empty() {
 202                                None
 203                            } else {
 204                                Some(language.to_string())
 205                            }
 206                        }
 207                    };
 208
 209                    self.cursor += 1;
 210
 211                    let code_block = self.parse_code_block(language).await?;
 212                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 213                }
 214                Tag::HtmlBlock => {
 215                    self.cursor += 1;
 216
 217                    Some(self.parse_html_block().await)
 218                }
 219                _ => None,
 220            },
 221            Event::Rule => {
 222                self.cursor += 1;
 223                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 224            }
 225            _ => None,
 226        }
 227    }
 228
 229    fn parse_text(
 230        &mut self,
 231        should_complete_on_soft_break: bool,
 232        source_range: Option<Range<usize>>,
 233    ) -> MarkdownParagraph {
 234        let source_range = source_range.unwrap_or_else(|| {
 235            self.current()
 236                .map(|(_, range)| range.clone())
 237                .unwrap_or_default()
 238        });
 239
 240        let mut markdown_text_like = Vec::new();
 241        let mut text = String::new();
 242        let mut bold_depth = 0;
 243        let mut italic_depth = 0;
 244        let mut strikethrough_depth = 0;
 245        let mut link: Option<Link> = None;
 246        let mut image: Option<Image> = None;
 247        let mut region_ranges: Vec<Range<usize>> = vec![];
 248        let mut regions: Vec<ParsedRegion> = vec![];
 249        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 250        let mut link_urls: Vec<String> = vec![];
 251        let mut link_ranges: Vec<Range<usize>> = vec![];
 252
 253        loop {
 254            if self.eof() {
 255                break;
 256            }
 257
 258            let (current, _) = self.current().unwrap();
 259            let prev_len = text.len();
 260            match current {
 261                Event::SoftBreak => {
 262                    if should_complete_on_soft_break {
 263                        break;
 264                    }
 265                    text.push(' ');
 266                }
 267
 268                Event::HardBreak => {
 269                    text.push('\n');
 270                }
 271
 272                // We want to ignore any inline HTML tags in the text but keep
 273                // the text between them
 274                Event::InlineHtml(_) => {}
 275
 276                Event::Text(t) => {
 277                    text.push_str(t.as_ref());
 278                    let mut style = MarkdownHighlightStyle::default();
 279
 280                    if bold_depth > 0 {
 281                        style.weight = FontWeight::BOLD;
 282                    }
 283
 284                    if italic_depth > 0 {
 285                        style.italic = true;
 286                    }
 287
 288                    if strikethrough_depth > 0 {
 289                        style.strikethrough = true;
 290                    }
 291
 292                    let last_run_len = if let Some(link) = link.clone() {
 293                        region_ranges.push(prev_len..text.len());
 294                        regions.push(ParsedRegion {
 295                            code: false,
 296                            link: Some(link),
 297                        });
 298                        style.link = true;
 299                        prev_len
 300                    } else {
 301                        // Manually scan for links
 302                        let mut finder = linkify::LinkFinder::new();
 303                        finder.kinds(&[linkify::LinkKind::Url]);
 304                        let mut last_link_len = prev_len;
 305                        for link in finder.links(t) {
 306                            let start = prev_len + link.start();
 307                            let end = prev_len + link.end();
 308                            let range = start..end;
 309                            link_ranges.push(range.clone());
 310                            link_urls.push(link.as_str().to_string());
 311
 312                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 313                            if style != MarkdownHighlightStyle::default() && last_link_len < start {
 314                                highlights.push((
 315                                    last_link_len..start,
 316                                    MarkdownHighlight::Style(style.clone()),
 317                                ));
 318                            }
 319
 320                            highlights.push((
 321                                range.clone(),
 322                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 323                                    underline: true,
 324                                    ..style
 325                                }),
 326                            ));
 327                            region_ranges.push(range.clone());
 328                            regions.push(ParsedRegion {
 329                                code: false,
 330                                link: Some(Link::Web {
 331                                    url: link.as_str().to_string(),
 332                                }),
 333                            });
 334                            last_link_len = end;
 335                        }
 336                        last_link_len
 337                    };
 338
 339                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 340                        let mut new_highlight = true;
 341                        if let Some((last_range, last_style)) = highlights.last_mut()
 342                            && last_range.end == last_run_len
 343                            && last_style == &MarkdownHighlight::Style(style.clone())
 344                        {
 345                            last_range.end = text.len();
 346                            new_highlight = false;
 347                        }
 348                        if new_highlight {
 349                            highlights.push((
 350                                last_run_len..text.len(),
 351                                MarkdownHighlight::Style(style.clone()),
 352                            ));
 353                        }
 354                    }
 355                }
 356                Event::Code(t) => {
 357                    text.push_str(t.as_ref());
 358                    region_ranges.push(prev_len..text.len());
 359
 360                    if link.is_some() {
 361                        highlights.push((
 362                            prev_len..text.len(),
 363                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 364                                link: true,
 365                                ..Default::default()
 366                            }),
 367                        ));
 368                    }
 369                    regions.push(ParsedRegion {
 370                        code: true,
 371                        link: link.clone(),
 372                    });
 373                }
 374                Event::Start(tag) => match tag {
 375                    Tag::Emphasis => italic_depth += 1,
 376                    Tag::Strong => bold_depth += 1,
 377                    Tag::Strikethrough => strikethrough_depth += 1,
 378                    Tag::Link { dest_url, .. } => {
 379                        link = Link::identify(
 380                            self.file_location_directory.clone(),
 381                            dest_url.to_string(),
 382                        );
 383                    }
 384                    Tag::Image { dest_url, .. } => {
 385                        if !text.is_empty() {
 386                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 387                                source_range: source_range.clone(),
 388                                contents: mem::take(&mut text).into(),
 389                                highlights: mem::take(&mut highlights),
 390                                region_ranges: mem::take(&mut region_ranges),
 391                                regions: mem::take(&mut regions),
 392                            });
 393                            markdown_text_like.push(parsed_regions);
 394                        }
 395                        image = Image::identify(
 396                            dest_url.to_string(),
 397                            source_range.clone(),
 398                            self.file_location_directory.clone(),
 399                        );
 400                    }
 401                    _ => {
 402                        break;
 403                    }
 404                },
 405
 406                Event::End(tag) => match tag {
 407                    TagEnd::Emphasis => italic_depth -= 1,
 408                    TagEnd::Strong => bold_depth -= 1,
 409                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 410                    TagEnd::Link => {
 411                        link = None;
 412                    }
 413                    TagEnd::Image => {
 414                        if let Some(mut image) = image.take() {
 415                            if !text.is_empty() {
 416                                image.set_alt_text(std::mem::take(&mut text).into());
 417                                mem::take(&mut highlights);
 418                                mem::take(&mut region_ranges);
 419                                mem::take(&mut regions);
 420                            }
 421                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 422                        }
 423                    }
 424                    TagEnd::Paragraph => {
 425                        self.cursor += 1;
 426                        break;
 427                    }
 428                    _ => {
 429                        break;
 430                    }
 431                },
 432                _ => {
 433                    break;
 434                }
 435            }
 436
 437            self.cursor += 1;
 438        }
 439        if !text.is_empty() {
 440            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 441                source_range,
 442                contents: text.into(),
 443                highlights,
 444                regions,
 445                region_ranges,
 446            }));
 447        }
 448        markdown_text_like
 449    }
 450
 451    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 452        let (_event, source_range) = self.previous().unwrap();
 453        let source_range = source_range.clone();
 454        let text = self.parse_text(true, None);
 455
 456        // Advance past the heading end tag
 457        self.cursor += 1;
 458
 459        ParsedMarkdownHeading {
 460            source_range,
 461            level: match level {
 462                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 463                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 464                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 465                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 466                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 467                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 468            },
 469            contents: text,
 470        }
 471    }
 472
 473    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 474        let (_event, source_range) = self.previous().unwrap();
 475        let source_range = source_range.clone();
 476        let mut header = vec![];
 477        let mut body = vec![];
 478        let mut row_columns = vec![];
 479        let mut in_header = true;
 480        let column_alignments = alignment
 481            .iter()
 482            .map(Self::convert_alignment)
 483            .collect::<Vec<_>>();
 484
 485        loop {
 486            if self.eof() {
 487                break;
 488            }
 489
 490            let (current, source_range) = self.current().unwrap();
 491            let source_range = source_range.clone();
 492            match current {
 493                Event::Start(Tag::TableHead)
 494                | Event::Start(Tag::TableRow)
 495                | Event::End(TagEnd::TableCell) => {
 496                    self.cursor += 1;
 497                }
 498                Event::Start(Tag::TableCell) => {
 499                    self.cursor += 1;
 500                    let cell_contents = self.parse_text(false, Some(source_range));
 501                    row_columns.push(ParsedMarkdownTableColumn {
 502                        col_span: 1,
 503                        row_span: 1,
 504                        is_header: in_header,
 505                        children: cell_contents,
 506                        alignment: column_alignments
 507                            .get(row_columns.len())
 508                            .copied()
 509                            .unwrap_or_default(),
 510                    });
 511                }
 512                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 513                    self.cursor += 1;
 514                    let columns = std::mem::take(&mut row_columns);
 515                    if in_header {
 516                        header.push(ParsedMarkdownTableRow { columns: columns });
 517                        in_header = false;
 518                    } else {
 519                        body.push(ParsedMarkdownTableRow::with_columns(columns));
 520                    }
 521                }
 522                Event::End(TagEnd::Table) => {
 523                    self.cursor += 1;
 524                    break;
 525                }
 526                _ => {
 527                    break;
 528                }
 529            }
 530        }
 531
 532        ParsedMarkdownTable {
 533            source_range,
 534            header,
 535            body,
 536        }
 537    }
 538
 539    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 540        match alignment {
 541            Alignment::None => ParsedMarkdownTableAlignment::None,
 542            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 543            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 544            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 545        }
 546    }
 547
 548    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 549        let (_, list_source_range) = self.previous().unwrap();
 550
 551        let mut items = Vec::new();
 552        let mut items_stack = vec![MarkdownListItem::default()];
 553        let mut depth = 1;
 554        let mut order = order;
 555        let mut order_stack = Vec::new();
 556
 557        let mut insertion_indices = FxHashMap::default();
 558        let mut source_ranges = FxHashMap::default();
 559        let mut start_item_range = list_source_range.clone();
 560
 561        while !self.eof() {
 562            let (current, source_range) = self.current().unwrap();
 563            match current {
 564                Event::Start(Tag::List(new_order)) => {
 565                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 566                        insertion_indices.insert(depth, items.len());
 567                    }
 568
 569                    // We will use the start of the nested list as the end for the current item's range,
 570                    // because we don't care about the hierarchy of list items
 571                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 572                        e.insert(start_item_range.start..source_range.start);
 573                    }
 574
 575                    order_stack.push(order);
 576                    order = *new_order;
 577                    self.cursor += 1;
 578                    depth += 1;
 579                }
 580                Event::End(TagEnd::List(_)) => {
 581                    order = order_stack.pop().flatten();
 582                    self.cursor += 1;
 583                    depth -= 1;
 584
 585                    if depth == 0 {
 586                        break;
 587                    }
 588                }
 589                Event::Start(Tag::Item) => {
 590                    start_item_range = source_range.clone();
 591
 592                    self.cursor += 1;
 593                    items_stack.push(MarkdownListItem::default());
 594
 595                    let mut task_list = None;
 596                    // Check for task list marker (`- [ ]` or `- [x]`)
 597                    if let Some(event) = self.current_event() {
 598                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 599                        if event == &Event::Start(Tag::Paragraph) {
 600                            self.cursor += 1;
 601                        }
 602
 603                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 604                            task_list = Some((*checked, range.clone()));
 605                            self.cursor += 1;
 606                        }
 607                    }
 608
 609                    if let Some((event, range)) = self.current() {
 610                        // This is a plain list item.
 611                        // For example `- some text` or `1. [Docs](./docs.md)`
 612                        if MarkdownParser::is_text_like(event) {
 613                            let text = self.parse_text(false, Some(range.clone()));
 614                            let block = ParsedMarkdownElement::Paragraph(text);
 615                            if let Some(content) = items_stack.last_mut() {
 616                                let item_type = if let Some((checked, range)) = task_list {
 617                                    ParsedMarkdownListItemType::Task(checked, range)
 618                                } else if let Some(order) = order {
 619                                    ParsedMarkdownListItemType::Ordered(order)
 620                                } else {
 621                                    ParsedMarkdownListItemType::Unordered
 622                                };
 623                                content.item_type = item_type;
 624                                content.content.push(block);
 625                            }
 626                        } else {
 627                            let block = self.parse_block().await;
 628                            if let Some(block) = block
 629                                && let Some(list_item) = items_stack.last_mut()
 630                            {
 631                                list_item.content.extend(block);
 632                            }
 633                        }
 634                    }
 635
 636                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 637                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 638                        self.cursor += 1;
 639                    }
 640                }
 641                Event::End(TagEnd::Item) => {
 642                    self.cursor += 1;
 643
 644                    if let Some(current) = order {
 645                        order = Some(current + 1);
 646                    }
 647
 648                    if let Some(list_item) = items_stack.pop() {
 649                        let source_range = source_ranges
 650                            .remove(&depth)
 651                            .unwrap_or(start_item_range.clone());
 652
 653                        // We need to remove the last character of the source range, because it includes the newline character
 654                        let source_range = source_range.start..source_range.end - 1;
 655                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 656                            source_range,
 657                            content: list_item.content,
 658                            depth,
 659                            item_type: list_item.item_type,
 660                            nested: false,
 661                        });
 662
 663                        if let Some(index) = insertion_indices.get(&depth) {
 664                            items.insert(*index, item);
 665                            insertion_indices.remove(&depth);
 666                        } else {
 667                            items.push(item);
 668                        }
 669                    }
 670                }
 671                _ => {
 672                    if depth == 0 {
 673                        break;
 674                    }
 675                    // This can only happen if a list item starts with more then one paragraph,
 676                    // or the list item contains blocks that should be rendered after the nested list items
 677                    let block = self.parse_block().await;
 678                    if let Some(block) = block {
 679                        if let Some(list_item) = items_stack.last_mut() {
 680                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 681                            if !insertion_indices.contains_key(&depth) {
 682                                list_item.content.extend(block);
 683                                continue;
 684                            }
 685                        }
 686
 687                        // Otherwise we need to insert the block after all the nested items
 688                        // that have been parsed so far
 689                        items.extend(block);
 690                    } else {
 691                        self.cursor += 1;
 692                    }
 693                }
 694            }
 695        }
 696
 697        items
 698    }
 699
 700    #[async_recursion]
 701    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 702        let (_event, source_range) = self.previous().unwrap();
 703        let source_range = source_range.clone();
 704        let mut nested_depth = 1;
 705
 706        let mut children: Vec<ParsedMarkdownElement> = vec![];
 707
 708        while !self.eof() {
 709            let block = self.parse_block().await;
 710
 711            if let Some(block) = block {
 712                children.extend(block);
 713            } else {
 714                break;
 715            }
 716
 717            if self.eof() {
 718                break;
 719            }
 720
 721            let (current, _source_range) = self.current().unwrap();
 722            match current {
 723                // This is a nested block quote.
 724                // Record that we're in a nested block quote and continue parsing.
 725                // We don't need to advance the cursor since the next
 726                // call to `parse_block` will handle it.
 727                Event::Start(Tag::BlockQuote(_kind)) => {
 728                    nested_depth += 1;
 729                }
 730                Event::End(TagEnd::BlockQuote(_kind)) => {
 731                    nested_depth -= 1;
 732                    if nested_depth == 0 {
 733                        self.cursor += 1;
 734                        break;
 735                    }
 736                }
 737                _ => {}
 738            };
 739        }
 740
 741        ParsedMarkdownBlockQuote {
 742            source_range,
 743            children,
 744        }
 745    }
 746
 747    async fn parse_code_block(
 748        &mut self,
 749        language: Option<String>,
 750    ) -> Option<ParsedMarkdownCodeBlock> {
 751        let Some((_event, source_range)) = self.previous() else {
 752            return None;
 753        };
 754
 755        let source_range = source_range.clone();
 756        let mut code = String::new();
 757
 758        while !self.eof() {
 759            let Some((current, _source_range)) = self.current() else {
 760                break;
 761            };
 762
 763            match current {
 764                Event::Text(text) => {
 765                    code.push_str(text);
 766                    self.cursor += 1;
 767                }
 768                Event::End(TagEnd::CodeBlock) => {
 769                    self.cursor += 1;
 770                    break;
 771                }
 772                _ => {
 773                    break;
 774                }
 775            }
 776        }
 777
 778        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 779
 780        let highlights = if let Some(language) = &language {
 781            if let Some(registry) = &self.language_registry {
 782                let rope: language::Rope = code.as_str().into();
 783                registry
 784                    .language_for_name_or_extension(language)
 785                    .await
 786                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 787                    .ok()
 788            } else {
 789                None
 790            }
 791        } else {
 792            None
 793        };
 794
 795        Some(ParsedMarkdownCodeBlock {
 796            source_range,
 797            contents: code.into(),
 798            language,
 799            highlights,
 800        })
 801    }
 802
 803    async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
 804        let mut elements = Vec::new();
 805        let Some((_event, _source_range)) = self.previous() else {
 806            return elements;
 807        };
 808
 809        let mut html_source_range_start = None;
 810        let mut html_source_range_end = None;
 811        let mut html_buffer = String::new();
 812
 813        while !self.eof() {
 814            let Some((current, source_range)) = self.current() else {
 815                break;
 816            };
 817            let source_range = source_range.clone();
 818            match current {
 819                Event::Html(html) => {
 820                    html_source_range_start.get_or_insert(source_range.start);
 821                    html_source_range_end = Some(source_range.end);
 822                    html_buffer.push_str(html);
 823                    self.cursor += 1;
 824                }
 825                Event::End(TagEnd::CodeBlock) => {
 826                    self.cursor += 1;
 827                    break;
 828                }
 829                _ => {
 830                    break;
 831                }
 832            }
 833        }
 834
 835        let bytes = cleanup_html(&html_buffer);
 836
 837        let mut cursor = std::io::Cursor::new(bytes);
 838        if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
 839            .from_utf8()
 840            .read_from(&mut cursor)
 841            && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
 842        {
 843            self.parse_html_node(
 844                start..end,
 845                &dom.document,
 846                &mut elements,
 847                &ParseHtmlNodeContext::default(),
 848            );
 849        }
 850
 851        elements
 852    }
 853
 854    fn parse_html_node(
 855        &self,
 856        source_range: Range<usize>,
 857        node: &Rc<markup5ever_rcdom::Node>,
 858        elements: &mut Vec<ParsedMarkdownElement>,
 859        context: &ParseHtmlNodeContext,
 860    ) {
 861        match &node.data {
 862            markup5ever_rcdom::NodeData::Document => {
 863                self.consume_children(source_range, node, elements, context);
 864            }
 865            markup5ever_rcdom::NodeData::Text { contents } => {
 866                elements.push(ParsedMarkdownElement::Paragraph(vec![
 867                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
 868                        source_range,
 869                        regions: Vec::default(),
 870                        region_ranges: Vec::default(),
 871                        highlights: Vec::default(),
 872                        contents: contents.borrow().to_string().into(),
 873                    }),
 874                ]));
 875            }
 876            markup5ever_rcdom::NodeData::Comment { .. } => {}
 877            markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
 878                if local_name!("img") == name.local {
 879                    if let Some(image) = self.extract_image(source_range, attrs) {
 880                        elements.push(ParsedMarkdownElement::Image(image));
 881                    }
 882                } else if local_name!("p") == name.local {
 883                    let mut paragraph = MarkdownParagraph::new();
 884                    self.parse_paragraph(source_range, node, &mut paragraph);
 885
 886                    if !paragraph.is_empty() {
 887                        elements.push(ParsedMarkdownElement::Paragraph(paragraph));
 888                    }
 889                } else if matches!(
 890                    name.local,
 891                    local_name!("h1")
 892                        | local_name!("h2")
 893                        | local_name!("h3")
 894                        | local_name!("h4")
 895                        | local_name!("h5")
 896                        | local_name!("h6")
 897                ) {
 898                    let mut paragraph = MarkdownParagraph::new();
 899                    self.consume_paragraph(source_range.clone(), node, &mut paragraph);
 900
 901                    if !paragraph.is_empty() {
 902                        elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
 903                            source_range,
 904                            level: match name.local {
 905                                local_name!("h1") => HeadingLevel::H1,
 906                                local_name!("h2") => HeadingLevel::H2,
 907                                local_name!("h3") => HeadingLevel::H3,
 908                                local_name!("h4") => HeadingLevel::H4,
 909                                local_name!("h5") => HeadingLevel::H5,
 910                                local_name!("h6") => HeadingLevel::H6,
 911                                _ => unreachable!(),
 912                            },
 913                            contents: paragraph,
 914                        }));
 915                    }
 916                } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
 917                    if let Some(list_items) = self.extract_html_list(
 918                        node,
 919                        local_name!("ol") == name.local,
 920                        context.list_item_depth,
 921                        source_range,
 922                    ) {
 923                        elements.extend(list_items);
 924                    }
 925                } else if local_name!("blockquote") == name.local {
 926                    if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
 927                        elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
 928                    }
 929                } else if local_name!("table") == name.local {
 930                    if let Some(table) = self.extract_html_table(node, source_range) {
 931                        elements.push(ParsedMarkdownElement::Table(table));
 932                    }
 933                } else {
 934                    self.consume_children(source_range, node, elements, context);
 935                }
 936            }
 937            _ => {}
 938        }
 939    }
 940
 941    fn parse_paragraph(
 942        &self,
 943        source_range: Range<usize>,
 944        node: &Rc<markup5ever_rcdom::Node>,
 945        paragraph: &mut MarkdownParagraph,
 946    ) {
 947        match &node.data {
 948            markup5ever_rcdom::NodeData::Text { contents } => {
 949                paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 950                    source_range,
 951                    regions: Vec::default(),
 952                    region_ranges: Vec::default(),
 953                    highlights: Vec::default(),
 954                    contents: contents.borrow().to_string().into(),
 955                }));
 956            }
 957            markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
 958                if local_name!("img") == name.local {
 959                    if let Some(image) = self.extract_image(source_range, attrs) {
 960                        paragraph.push(MarkdownParagraphChunk::Image(image));
 961                    }
 962                } else {
 963                    self.consume_paragraph(source_range, node, paragraph);
 964                }
 965            }
 966            _ => {}
 967        }
 968    }
 969
 970    fn consume_paragraph(
 971        &self,
 972        source_range: Range<usize>,
 973        node: &Rc<markup5ever_rcdom::Node>,
 974        paragraph: &mut MarkdownParagraph,
 975    ) {
 976        for node in node.children.borrow().iter() {
 977            self.parse_paragraph(source_range.clone(), node, paragraph);
 978        }
 979    }
 980
 981    fn parse_table_row(
 982        &self,
 983        source_range: Range<usize>,
 984        node: &Rc<markup5ever_rcdom::Node>,
 985    ) -> Option<ParsedMarkdownTableRow> {
 986        let mut columns = Vec::new();
 987
 988        match &node.data {
 989            markup5ever_rcdom::NodeData::Element { name, .. } => {
 990                if local_name!("tr") != name.local {
 991                    return None;
 992                }
 993
 994                for node in node.children.borrow().iter() {
 995                    if let Some(column) = self.parse_table_column(source_range.clone(), node) {
 996                        columns.push(column);
 997                    }
 998                }
 999            }
1000            _ => {}
1001        }
1002
1003        if columns.is_empty() {
1004            None
1005        } else {
1006            Some(ParsedMarkdownTableRow { columns })
1007        }
1008    }
1009
1010    fn parse_table_column(
1011        &self,
1012        source_range: Range<usize>,
1013        node: &Rc<markup5ever_rcdom::Node>,
1014    ) -> Option<ParsedMarkdownTableColumn> {
1015        match &node.data {
1016            markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1017                if !matches!(name.local, local_name!("th") | local_name!("td")) {
1018                    return None;
1019                }
1020
1021                let mut children = MarkdownParagraph::new();
1022                self.consume_paragraph(source_range, node, &mut children);
1023
1024                let is_header = matches!(name.local, local_name!("th"));
1025
1026                Some(ParsedMarkdownTableColumn {
1027                    col_span: std::cmp::max(
1028                        Self::attr_value(attrs, local_name!("colspan"))
1029                            .and_then(|span| span.parse().ok())
1030                            .unwrap_or(1),
1031                        1,
1032                    ),
1033                    row_span: std::cmp::max(
1034                        Self::attr_value(attrs, local_name!("rowspan"))
1035                            .and_then(|span| span.parse().ok())
1036                            .unwrap_or(1),
1037                        1,
1038                    ),
1039                    is_header,
1040                    children,
1041                    alignment: Self::attr_value(attrs, local_name!("align"))
1042                        .and_then(|align| match align.as_str() {
1043                            "left" => Some(ParsedMarkdownTableAlignment::Left),
1044                            "center" => Some(ParsedMarkdownTableAlignment::Center),
1045                            "right" => Some(ParsedMarkdownTableAlignment::Right),
1046                            _ => None,
1047                        })
1048                        .unwrap_or_else(|| {
1049                            if is_header {
1050                                ParsedMarkdownTableAlignment::Center
1051                            } else {
1052                                ParsedMarkdownTableAlignment::default()
1053                            }
1054                        }),
1055                })
1056            }
1057            _ => None,
1058        }
1059    }
1060
1061    fn consume_children(
1062        &self,
1063        source_range: Range<usize>,
1064        node: &Rc<markup5ever_rcdom::Node>,
1065        elements: &mut Vec<ParsedMarkdownElement>,
1066        context: &ParseHtmlNodeContext,
1067    ) {
1068        for node in node.children.borrow().iter() {
1069            self.parse_html_node(source_range.clone(), node, elements, context);
1070        }
1071    }
1072
1073    fn attr_value(
1074        attrs: &RefCell<Vec<html5ever::Attribute>>,
1075        name: html5ever::LocalName,
1076    ) -> Option<String> {
1077        attrs.borrow().iter().find_map(|attr| {
1078            if attr.name.local == name {
1079                Some(attr.value.to_string())
1080            } else {
1081                None
1082            }
1083        })
1084    }
1085
1086    fn extract_styles_from_attributes(
1087        attrs: &RefCell<Vec<html5ever::Attribute>>,
1088    ) -> HashMap<String, String> {
1089        let mut styles = HashMap::new();
1090
1091        if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1092            for decl in style.split(';') {
1093                let mut parts = decl.splitn(2, ':');
1094                if let Some((key, value)) = parts.next().zip(parts.next()) {
1095                    styles.insert(
1096                        key.trim().to_lowercase().to_string(),
1097                        value.trim().to_string(),
1098                    );
1099                }
1100            }
1101        }
1102
1103        styles
1104    }
1105
1106    fn extract_image(
1107        &self,
1108        source_range: Range<usize>,
1109        attrs: &RefCell<Vec<html5ever::Attribute>>,
1110    ) -> Option<Image> {
1111        let src = Self::attr_value(attrs, local_name!("src"))?;
1112
1113        let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1114
1115        if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1116            image.set_alt_text(alt.into());
1117        }
1118
1119        let styles = Self::extract_styles_from_attributes(attrs);
1120
1121        if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1122            .or_else(|| styles.get("width").cloned())
1123            .and_then(|width| Self::parse_html_element_dimension(&width))
1124        {
1125            image.set_width(width);
1126        }
1127
1128        if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1129            .or_else(|| styles.get("height").cloned())
1130            .and_then(|height| Self::parse_html_element_dimension(&height))
1131        {
1132            image.set_height(height);
1133        }
1134
1135        Some(image)
1136    }
1137
1138    fn extract_html_list(
1139        &self,
1140        node: &Rc<markup5ever_rcdom::Node>,
1141        ordered: bool,
1142        depth: u16,
1143        source_range: Range<usize>,
1144    ) -> Option<Vec<ParsedMarkdownElement>> {
1145        let mut list_items = Vec::with_capacity(node.children.borrow().len());
1146
1147        for (index, node) in node.children.borrow().iter().enumerate() {
1148            match &node.data {
1149                markup5ever_rcdom::NodeData::Element { name, .. } => {
1150                    if local_name!("li") != name.local {
1151                        continue;
1152                    }
1153
1154                    let mut content = Vec::new();
1155                    self.consume_children(
1156                        source_range.clone(),
1157                        node,
1158                        &mut content,
1159                        &ParseHtmlNodeContext {
1160                            list_item_depth: depth + 1,
1161                        },
1162                    );
1163
1164                    if !content.is_empty() {
1165                        list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1166                            depth,
1167                            source_range: source_range.clone(),
1168                            item_type: if ordered {
1169                                ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1170                            } else {
1171                                ParsedMarkdownListItemType::Unordered
1172                            },
1173                            content,
1174                            nested: true,
1175                        }));
1176                    }
1177                }
1178                _ => {}
1179            }
1180        }
1181
1182        if list_items.is_empty() {
1183            None
1184        } else {
1185            Some(list_items)
1186        }
1187    }
1188
1189    fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1190        if value.ends_with("%") {
1191            value
1192                .trim_end_matches("%")
1193                .parse::<f32>()
1194                .ok()
1195                .map(|value| relative(value / 100.))
1196        } else {
1197            value
1198                .trim_end_matches("px")
1199                .parse()
1200                .ok()
1201                .map(|value| px(value).into())
1202        }
1203    }
1204
1205    fn extract_html_blockquote(
1206        &self,
1207        node: &Rc<markup5ever_rcdom::Node>,
1208        source_range: Range<usize>,
1209    ) -> Option<ParsedMarkdownBlockQuote> {
1210        let mut children = Vec::new();
1211        self.consume_children(
1212            source_range.clone(),
1213            node,
1214            &mut children,
1215            &ParseHtmlNodeContext::default(),
1216        );
1217
1218        if children.is_empty() {
1219            None
1220        } else {
1221            Some(ParsedMarkdownBlockQuote {
1222                children,
1223                source_range,
1224            })
1225        }
1226    }
1227
1228    fn extract_html_table(
1229        &self,
1230        node: &Rc<markup5ever_rcdom::Node>,
1231        source_range: Range<usize>,
1232    ) -> Option<ParsedMarkdownTable> {
1233        let mut header_rows = Vec::new();
1234        let mut body_rows = Vec::new();
1235
1236        // node should be a thead or tbody element
1237        for node in node.children.borrow().iter() {
1238            match &node.data {
1239                markup5ever_rcdom::NodeData::Element { name, .. } => {
1240                    if local_name!("thead") == name.local {
1241                        // node should be a tr element
1242                        for node in node.children.borrow().iter() {
1243                            if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1244                                header_rows.push(row);
1245                            }
1246                        }
1247                    } else if local_name!("tbody") == name.local {
1248                        // node should be a tr element
1249                        for node in node.children.borrow().iter() {
1250                            if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1251                                body_rows.push(row);
1252                            }
1253                        }
1254                    }
1255                }
1256                _ => {}
1257            }
1258        }
1259
1260        if !header_rows.is_empty() || !body_rows.is_empty() {
1261            Some(ParsedMarkdownTable {
1262                source_range,
1263                body: body_rows,
1264                header: header_rows,
1265            })
1266        } else {
1267            None
1268        }
1269    }
1270}
1271
1272#[cfg(test)]
1273mod tests {
1274    use super::*;
1275    use ParsedMarkdownListItemType::*;
1276    use core::panic;
1277    use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1278    use language::{
1279        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1280    };
1281    use pretty_assertions::assert_eq;
1282
1283    async fn parse(input: &str) -> ParsedMarkdown {
1284        parse_markdown(input, None, None).await
1285    }
1286
1287    #[gpui::test]
1288    async fn test_headings() {
1289        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1290
1291        assert_eq!(
1292            parsed.children,
1293            vec![
1294                h1(text("Heading one", 2..13), 0..14),
1295                h2(text("Heading two", 17..28), 14..29),
1296                h3(text("Heading three", 33..46), 29..46),
1297            ]
1298        );
1299    }
1300
1301    #[gpui::test]
1302    async fn test_newlines_dont_new_paragraphs() {
1303        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1304
1305        assert_eq!(
1306            parsed.children,
1307            vec![p("Some text that is bolded and italicized", 0..46)]
1308        );
1309    }
1310
1311    #[gpui::test]
1312    async fn test_heading_with_paragraph() {
1313        let parsed = parse("# Zed\nThe editor").await;
1314
1315        assert_eq!(
1316            parsed.children,
1317            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1318        );
1319    }
1320
1321    #[gpui::test]
1322    async fn test_double_newlines_do_new_paragraphs() {
1323        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1324
1325        assert_eq!(
1326            parsed.children,
1327            vec![
1328                p("Some text that is bolded", 0..29),
1329                p("and italicized", 31..47),
1330            ]
1331        );
1332    }
1333
1334    #[gpui::test]
1335    async fn test_bold_italic_text() {
1336        let parsed = parse("Some text **that is bolded** and *italicized*").await;
1337
1338        assert_eq!(
1339            parsed.children,
1340            vec![p("Some text that is bolded and italicized", 0..45)]
1341        );
1342    }
1343
1344    #[gpui::test]
1345    async fn test_nested_bold_strikethrough_text() {
1346        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1347
1348        assert_eq!(parsed.children.len(), 1);
1349        assert_eq!(
1350            parsed.children[0],
1351            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1352                ParsedMarkdownText {
1353                    source_range: 0..35,
1354                    contents: "Some bostrikethroughld text".into(),
1355                    highlights: Vec::new(),
1356                    region_ranges: Vec::new(),
1357                    regions: Vec::new(),
1358                }
1359            )])
1360        );
1361
1362        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1363            text
1364        } else {
1365            panic!("Expected a paragraph");
1366        };
1367
1368        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1369            text
1370        } else {
1371            panic!("Expected a text");
1372        };
1373
1374        assert_eq!(
1375            paragraph.highlights,
1376            vec![
1377                (
1378                    5..7,
1379                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1380                        weight: FontWeight::BOLD,
1381                        ..Default::default()
1382                    }),
1383                ),
1384                (
1385                    7..20,
1386                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1387                        weight: FontWeight::BOLD,
1388                        strikethrough: true,
1389                        ..Default::default()
1390                    }),
1391                ),
1392                (
1393                    20..22,
1394                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1395                        weight: FontWeight::BOLD,
1396                        ..Default::default()
1397                    }),
1398                ),
1399            ]
1400        );
1401    }
1402
1403    #[gpui::test]
1404    async fn test_text_with_inline_html() {
1405        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1406
1407        assert_eq!(
1408            parsed.children,
1409            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1410        );
1411    }
1412
1413    #[gpui::test]
1414    async fn test_raw_links_detection() {
1415        let parsed = parse("Checkout this https://zed.dev link").await;
1416
1417        assert_eq!(
1418            parsed.children,
1419            vec![p("Checkout this https://zed.dev link", 0..34)]
1420        );
1421    }
1422
1423    #[gpui::test]
1424    async fn test_empty_image() {
1425        let parsed = parse("![]()").await;
1426
1427        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1428            text
1429        } else {
1430            panic!("Expected a paragraph");
1431        };
1432        assert_eq!(paragraph.len(), 0);
1433    }
1434
1435    #[gpui::test]
1436    async fn test_image_links_detection() {
1437        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
1438
1439        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1440            text
1441        } else {
1442            panic!("Expected a paragraph");
1443        };
1444        assert_eq!(
1445                paragraph[0],
1446                MarkdownParagraphChunk::Image(Image {
1447                    source_range: 0..111,
1448                    link: Link::Web {
1449                        url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1450                    },
1451                    alt_text: Some("test".into()),
1452                    height: None,
1453                    width: None,
1454                },)
1455            );
1456    }
1457
1458    #[gpui::test]
1459    async fn test_image_alt_text() {
1460        let parsed = parse("[![Zed](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json)](https://zed.dev)\n ").await;
1461
1462        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1463            text
1464        } else {
1465            panic!("Expected a paragraph");
1466        };
1467        assert_eq!(
1468                    paragraph[0],
1469                    MarkdownParagraphChunk::Image(Image {
1470                        source_range: 0..142,
1471                        link: Link::Web {
1472                            url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1473                        },
1474                        alt_text: Some("Zed".into()),
1475                        height: None,
1476                        width: None,
1477                    },)
1478                );
1479    }
1480
1481    #[gpui::test]
1482    async fn test_image_without_alt_text() {
1483        let parsed = parse("![](http://example.com/foo.png)").await;
1484
1485        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1486            text
1487        } else {
1488            panic!("Expected a paragraph");
1489        };
1490        assert_eq!(
1491            paragraph[0],
1492            MarkdownParagraphChunk::Image(Image {
1493                source_range: 0..31,
1494                link: Link::Web {
1495                    url: "http://example.com/foo.png".to_string(),
1496                },
1497                alt_text: None,
1498                height: None,
1499                width: None,
1500            },)
1501        );
1502    }
1503
1504    #[gpui::test]
1505    async fn test_image_with_alt_text_containing_formatting() {
1506        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
1507
1508        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1509            panic!("Expected a paragraph");
1510        };
1511        assert_eq!(
1512            chunks,
1513            &[MarkdownParagraphChunk::Image(Image {
1514                source_range: 0..44,
1515                link: Link::Web {
1516                    url: "http://example.com/foo.png".to_string(),
1517                },
1518                alt_text: Some("foo bar baz".into()),
1519                height: None,
1520                width: None,
1521            }),],
1522        );
1523    }
1524
1525    #[gpui::test]
1526    async fn test_images_with_text_in_between() {
1527        let parsed = parse(
1528            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
1529        )
1530        .await;
1531
1532        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1533            text
1534        } else {
1535            panic!("Expected a paragraph");
1536        };
1537        assert_eq!(
1538            chunks,
1539            &vec![
1540                MarkdownParagraphChunk::Image(Image {
1541                    source_range: 0..81,
1542                    link: Link::Web {
1543                        url: "http://example.com/foo.png".to_string(),
1544                    },
1545                    alt_text: Some("foo".into()),
1546                    height: None,
1547                    width: None,
1548                }),
1549                MarkdownParagraphChunk::Text(ParsedMarkdownText {
1550                    source_range: 0..81,
1551                    contents: " Lorem Ipsum ".into(),
1552                    highlights: Vec::new(),
1553                    region_ranges: Vec::new(),
1554                    regions: Vec::new(),
1555                }),
1556                MarkdownParagraphChunk::Image(Image {
1557                    source_range: 0..81,
1558                    link: Link::Web {
1559                        url: "http://example.com/bar.png".to_string(),
1560                    },
1561                    alt_text: Some("bar".into()),
1562                    height: None,
1563                    width: None,
1564                })
1565            ]
1566        );
1567    }
1568
1569    #[test]
1570    fn test_parse_html_element_dimension() {
1571        // Test percentage values
1572        assert_eq!(
1573            MarkdownParser::parse_html_element_dimension("50%"),
1574            Some(DefiniteLength::Fraction(0.5))
1575        );
1576        assert_eq!(
1577            MarkdownParser::parse_html_element_dimension("100%"),
1578            Some(DefiniteLength::Fraction(1.0))
1579        );
1580        assert_eq!(
1581            MarkdownParser::parse_html_element_dimension("25%"),
1582            Some(DefiniteLength::Fraction(0.25))
1583        );
1584        assert_eq!(
1585            MarkdownParser::parse_html_element_dimension("0%"),
1586            Some(DefiniteLength::Fraction(0.0))
1587        );
1588
1589        // Test pixel values
1590        assert_eq!(
1591            MarkdownParser::parse_html_element_dimension("100px"),
1592            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1593        );
1594        assert_eq!(
1595            MarkdownParser::parse_html_element_dimension("50px"),
1596            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1597        );
1598        assert_eq!(
1599            MarkdownParser::parse_html_element_dimension("0px"),
1600            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1601        );
1602
1603        // Test values without units (should be treated as pixels)
1604        assert_eq!(
1605            MarkdownParser::parse_html_element_dimension("100"),
1606            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1607        );
1608        assert_eq!(
1609            MarkdownParser::parse_html_element_dimension("42"),
1610            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1611        );
1612
1613        // Test invalid values
1614        assert_eq!(
1615            MarkdownParser::parse_html_element_dimension("invalid"),
1616            None
1617        );
1618        assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1619        assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1620        assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1621        assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1622        assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1623
1624        // Test decimal values
1625        assert_eq!(
1626            MarkdownParser::parse_html_element_dimension("50.5%"),
1627            Some(DefiniteLength::Fraction(0.505))
1628        );
1629        assert_eq!(
1630            MarkdownParser::parse_html_element_dimension("100.25px"),
1631            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1632        );
1633        assert_eq!(
1634            MarkdownParser::parse_html_element_dimension("42.0"),
1635            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1636        );
1637    }
1638
1639    #[gpui::test]
1640    async fn test_html_unordered_list() {
1641        let parsed = parse(
1642            "<ul>
1643              <li>Item 1</li>
1644              <li>Item 2</li>
1645            </ul>",
1646        )
1647        .await;
1648
1649        assert_eq!(
1650            ParsedMarkdown {
1651                children: vec![
1652                    nested_list_item(
1653                        0..82,
1654                        1,
1655                        ParsedMarkdownListItemType::Unordered,
1656                        vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1657                    ),
1658                    nested_list_item(
1659                        0..82,
1660                        1,
1661                        ParsedMarkdownListItemType::Unordered,
1662                        vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1663                    ),
1664                ]
1665            },
1666            parsed
1667        );
1668    }
1669
1670    #[gpui::test]
1671    async fn test_html_ordered_list() {
1672        let parsed = parse(
1673            "<ol>
1674              <li>Item 1</li>
1675              <li>Item 2</li>
1676            </ol>",
1677        )
1678        .await;
1679
1680        assert_eq!(
1681            ParsedMarkdown {
1682                children: vec![
1683                    nested_list_item(
1684                        0..82,
1685                        1,
1686                        ParsedMarkdownListItemType::Ordered(1),
1687                        vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1688                    ),
1689                    nested_list_item(
1690                        0..82,
1691                        1,
1692                        ParsedMarkdownListItemType::Ordered(2),
1693                        vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1694                    ),
1695                ]
1696            },
1697            parsed
1698        );
1699    }
1700
1701    #[gpui::test]
1702    async fn test_html_nested_ordered_list() {
1703        let parsed = parse(
1704            "<ol>
1705              <li>Item 1</li>
1706              <li>Item 2
1707                <ol>
1708                  <li>Sub-Item 1</li>
1709                  <li>Sub-Item 2</li>
1710                </ol>
1711              </li>
1712            </ol>",
1713        )
1714        .await;
1715
1716        assert_eq!(
1717            ParsedMarkdown {
1718                children: vec![
1719                    nested_list_item(
1720                        0..216,
1721                        1,
1722                        ParsedMarkdownListItemType::Ordered(1),
1723                        vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1724                    ),
1725                    nested_list_item(
1726                        0..216,
1727                        1,
1728                        ParsedMarkdownListItemType::Ordered(2),
1729                        vec![
1730                            ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1731                            nested_list_item(
1732                                0..216,
1733                                2,
1734                                ParsedMarkdownListItemType::Ordered(1),
1735                                vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1736                            ),
1737                            nested_list_item(
1738                                0..216,
1739                                2,
1740                                ParsedMarkdownListItemType::Ordered(2),
1741                                vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1742                            ),
1743                        ]
1744                    ),
1745                ]
1746            },
1747            parsed
1748        );
1749    }
1750
1751    #[gpui::test]
1752    async fn test_html_nested_unordered_list() {
1753        let parsed = parse(
1754            "<ul>
1755              <li>Item 1</li>
1756              <li>Item 2
1757                <ul>
1758                  <li>Sub-Item 1</li>
1759                  <li>Sub-Item 2</li>
1760                </ul>
1761              </li>
1762            </ul>",
1763        )
1764        .await;
1765
1766        assert_eq!(
1767            ParsedMarkdown {
1768                children: vec![
1769                    nested_list_item(
1770                        0..216,
1771                        1,
1772                        ParsedMarkdownListItemType::Unordered,
1773                        vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
1774                    ),
1775                    nested_list_item(
1776                        0..216,
1777                        1,
1778                        ParsedMarkdownListItemType::Unordered,
1779                        vec![
1780                            ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
1781                            nested_list_item(
1782                                0..216,
1783                                2,
1784                                ParsedMarkdownListItemType::Unordered,
1785                                vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
1786                            ),
1787                            nested_list_item(
1788                                0..216,
1789                                2,
1790                                ParsedMarkdownListItemType::Unordered,
1791                                vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
1792                            ),
1793                        ]
1794                    ),
1795                ]
1796            },
1797            parsed
1798        );
1799    }
1800
1801    #[gpui::test]
1802    async fn test_inline_html_image_tag() {
1803        let parsed =
1804            parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1805                .await;
1806
1807        assert_eq!(
1808            ParsedMarkdown {
1809                children: vec![ParsedMarkdownElement::Paragraph(vec![
1810                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
1811                        source_range: 0..71,
1812                        contents: "Some text".into(),
1813                        highlights: Default::default(),
1814                        region_ranges: Default::default(),
1815                        regions: Default::default()
1816                    }),
1817                    MarkdownParagraphChunk::Image(Image {
1818                        source_range: 0..71,
1819                        link: Link::Web {
1820                            url: "http://example.com/foo.png".to_string(),
1821                        },
1822                        alt_text: None,
1823                        height: None,
1824                        width: None,
1825                    }),
1826                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
1827                        source_range: 0..71,
1828                        contents: " some more text".into(),
1829                        highlights: Default::default(),
1830                        region_ranges: Default::default(),
1831                        regions: Default::default()
1832                    }),
1833                ])]
1834            },
1835            parsed
1836        );
1837    }
1838
1839    #[gpui::test]
1840    async fn test_html_block_quote() {
1841        let parsed = parse(
1842            "<blockquote>
1843                <p>some description</p>
1844            </blockquote>",
1845        )
1846        .await;
1847
1848        assert_eq!(
1849            ParsedMarkdown {
1850                children: vec![block_quote(
1851                    vec![ParsedMarkdownElement::Paragraph(text(
1852                        "some description",
1853                        0..78
1854                    ))],
1855                    0..78,
1856                )]
1857            },
1858            parsed
1859        );
1860    }
1861
1862    #[gpui::test]
1863    async fn test_html_nested_block_quote() {
1864        let parsed = parse(
1865            "<blockquote>
1866                <p>some description</p>
1867                <blockquote>
1868                <p>second description</p>
1869                </blockquote>
1870            </blockquote>",
1871        )
1872        .await;
1873
1874        assert_eq!(
1875            ParsedMarkdown {
1876                children: vec![block_quote(
1877                    vec![
1878                        ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
1879                        block_quote(
1880                            vec![ParsedMarkdownElement::Paragraph(text(
1881                                "second description",
1882                                0..179
1883                            ))],
1884                            0..179,
1885                        )
1886                    ],
1887                    0..179,
1888                )]
1889            },
1890            parsed
1891        );
1892    }
1893
1894    #[gpui::test]
1895    async fn test_html_table() {
1896        let parsed = parse(
1897            "<table>
1898          <thead>
1899            <tr>
1900              <th>Id</th>
1901              <th>Name</th>
1902            </tr>
1903          </thead>
1904          <tbody>
1905            <tr>
1906              <td>1</td>
1907              <td>Chris</td>
1908            </tr>
1909            <tr>
1910              <td>2</td>
1911              <td>Dennis</td>
1912            </tr>
1913          </tbody>
1914        </table>",
1915        )
1916        .await;
1917
1918        assert_eq!(
1919            ParsedMarkdown {
1920                children: vec![ParsedMarkdownElement::Table(table(
1921                    0..366,
1922                    vec![row(vec![
1923                        column(
1924                            1,
1925                            1,
1926                            true,
1927                            text("Id", 0..366),
1928                            ParsedMarkdownTableAlignment::Center
1929                        ),
1930                        column(
1931                            1,
1932                            1,
1933                            true,
1934                            text("Name ", 0..366),
1935                            ParsedMarkdownTableAlignment::Center
1936                        )
1937                    ])],
1938                    vec![
1939                        row(vec![
1940                            column(
1941                                1,
1942                                1,
1943                                false,
1944                                text("1", 0..366),
1945                                ParsedMarkdownTableAlignment::None
1946                            ),
1947                            column(
1948                                1,
1949                                1,
1950                                false,
1951                                text("Chris", 0..366),
1952                                ParsedMarkdownTableAlignment::None
1953                            )
1954                        ]),
1955                        row(vec![
1956                            column(
1957                                1,
1958                                1,
1959                                false,
1960                                text("2", 0..366),
1961                                ParsedMarkdownTableAlignment::None
1962                            ),
1963                            column(
1964                                1,
1965                                1,
1966                                false,
1967                                text("Dennis", 0..366),
1968                                ParsedMarkdownTableAlignment::None
1969                            )
1970                        ]),
1971                    ],
1972                ))],
1973            },
1974            parsed
1975        );
1976    }
1977
1978    #[gpui::test]
1979    async fn test_html_table_without_headings() {
1980        let parsed = parse(
1981            "<table>
1982          <tbody>
1983            <tr>
1984              <td>1</td>
1985              <td>Chris</td>
1986            </tr>
1987            <tr>
1988              <td>2</td>
1989              <td>Dennis</td>
1990            </tr>
1991          </tbody>
1992        </table>",
1993        )
1994        .await;
1995
1996        assert_eq!(
1997            ParsedMarkdown {
1998                children: vec![ParsedMarkdownElement::Table(table(
1999                    0..240,
2000                    vec![],
2001                    vec![
2002                        row(vec![
2003                            column(
2004                                1,
2005                                1,
2006                                false,
2007                                text("1", 0..240),
2008                                ParsedMarkdownTableAlignment::None
2009                            ),
2010                            column(
2011                                1,
2012                                1,
2013                                false,
2014                                text("Chris", 0..240),
2015                                ParsedMarkdownTableAlignment::None
2016                            )
2017                        ]),
2018                        row(vec![
2019                            column(
2020                                1,
2021                                1,
2022                                false,
2023                                text("2", 0..240),
2024                                ParsedMarkdownTableAlignment::None
2025                            ),
2026                            column(
2027                                1,
2028                                1,
2029                                false,
2030                                text("Dennis", 0..240),
2031                                ParsedMarkdownTableAlignment::None
2032                            )
2033                        ]),
2034                    ],
2035                ))],
2036            },
2037            parsed
2038        );
2039    }
2040
2041    #[gpui::test]
2042    async fn test_html_table_without_body() {
2043        let parsed = parse(
2044            "<table>
2045          <thead>
2046            <tr>
2047              <th>Id</th>
2048              <th>Name</th>
2049            </tr>
2050          </thead>
2051        </table>",
2052        )
2053        .await;
2054
2055        assert_eq!(
2056            ParsedMarkdown {
2057                children: vec![ParsedMarkdownElement::Table(table(
2058                    0..150,
2059                    vec![row(vec![
2060                        column(
2061                            1,
2062                            1,
2063                            true,
2064                            text("Id", 0..150),
2065                            ParsedMarkdownTableAlignment::Center
2066                        ),
2067                        column(
2068                            1,
2069                            1,
2070                            true,
2071                            text("Name", 0..150),
2072                            ParsedMarkdownTableAlignment::Center
2073                        )
2074                    ])],
2075                    vec![],
2076                ))],
2077            },
2078            parsed
2079        );
2080    }
2081
2082    #[gpui::test]
2083    async fn test_html_heading_tags() {
2084        let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2085
2086        assert_eq!(
2087            ParsedMarkdown {
2088                children: vec![
2089                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2090                        level: HeadingLevel::H1,
2091                        source_range: 0..96,
2092                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2093                            source_range: 0..96,
2094                            contents: "Heading".into(),
2095                            highlights: Vec::default(),
2096                            region_ranges: Vec::default(),
2097                            regions: Vec::default()
2098                        })],
2099                    }),
2100                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2101                        level: HeadingLevel::H2,
2102                        source_range: 0..96,
2103                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2104                            source_range: 0..96,
2105                            contents: "Heading".into(),
2106                            highlights: Vec::default(),
2107                            region_ranges: Vec::default(),
2108                            regions: Vec::default()
2109                        })],
2110                    }),
2111                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2112                        level: HeadingLevel::H3,
2113                        source_range: 0..96,
2114                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2115                            source_range: 0..96,
2116                            contents: "Heading".into(),
2117                            highlights: Vec::default(),
2118                            region_ranges: Vec::default(),
2119                            regions: Vec::default()
2120                        })],
2121                    }),
2122                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2123                        level: HeadingLevel::H4,
2124                        source_range: 0..96,
2125                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2126                            source_range: 0..96,
2127                            contents: "Heading".into(),
2128                            highlights: Vec::default(),
2129                            region_ranges: Vec::default(),
2130                            regions: Vec::default()
2131                        })],
2132                    }),
2133                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2134                        level: HeadingLevel::H5,
2135                        source_range: 0..96,
2136                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2137                            source_range: 0..96,
2138                            contents: "Heading".into(),
2139                            highlights: Vec::default(),
2140                            region_ranges: Vec::default(),
2141                            regions: Vec::default()
2142                        })],
2143                    }),
2144                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2145                        level: HeadingLevel::H6,
2146                        source_range: 0..96,
2147                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2148                            source_range: 0..96,
2149                            contents: "Heading".into(),
2150                            highlights: Vec::default(),
2151                            region_ranges: Vec::default(),
2152                            regions: Vec::default()
2153                        })],
2154                    }),
2155                ],
2156            },
2157            parsed
2158        );
2159    }
2160
2161    #[gpui::test]
2162    async fn test_html_image_tag() {
2163        let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2164
2165        assert_eq!(
2166            ParsedMarkdown {
2167                children: vec![ParsedMarkdownElement::Image(Image {
2168                    source_range: 0..40,
2169                    link: Link::Web {
2170                        url: "http://example.com/foo.png".to_string(),
2171                    },
2172                    alt_text: None,
2173                    height: None,
2174                    width: None,
2175                })]
2176            },
2177            parsed
2178        );
2179    }
2180
2181    #[gpui::test]
2182    async fn test_html_image_tag_with_alt_text() {
2183        let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2184
2185        assert_eq!(
2186            ParsedMarkdown {
2187                children: vec![ParsedMarkdownElement::Image(Image {
2188                    source_range: 0..50,
2189                    link: Link::Web {
2190                        url: "http://example.com/foo.png".to_string(),
2191                    },
2192                    alt_text: Some("Foo".into()),
2193                    height: None,
2194                    width: None,
2195                })]
2196            },
2197            parsed
2198        );
2199    }
2200
2201    #[gpui::test]
2202    async fn test_html_image_tag_with_height_and_width() {
2203        let parsed =
2204            parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2205
2206        assert_eq!(
2207            ParsedMarkdown {
2208                children: vec![ParsedMarkdownElement::Image(Image {
2209                    source_range: 0..65,
2210                    link: Link::Web {
2211                        url: "http://example.com/foo.png".to_string(),
2212                    },
2213                    alt_text: None,
2214                    height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2215                    width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2216                })]
2217            },
2218            parsed
2219        );
2220    }
2221
2222    #[gpui::test]
2223    async fn test_html_image_style_tag_with_height_and_width() {
2224        let parsed = parse(
2225            "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2226        )
2227        .await;
2228
2229        assert_eq!(
2230            ParsedMarkdown {
2231                children: vec![ParsedMarkdownElement::Image(Image {
2232                    source_range: 0..75,
2233                    link: Link::Web {
2234                        url: "http://example.com/foo.png".to_string(),
2235                    },
2236                    alt_text: None,
2237                    height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2238                    width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2239                })]
2240            },
2241            parsed
2242        );
2243    }
2244
2245    #[gpui::test]
2246    async fn test_header_only_table() {
2247        let markdown = "\
2248| Header 1 | Header 2 |
2249|----------|----------|
2250
2251Some other content
2252";
2253
2254        let expected_table = table(
2255            0..48,
2256            vec![row(vec![
2257                column(
2258                    1,
2259                    1,
2260                    true,
2261                    text("Header 1", 1..11),
2262                    ParsedMarkdownTableAlignment::None,
2263                ),
2264                column(
2265                    1,
2266                    1,
2267                    true,
2268                    text("Header 2", 12..22),
2269                    ParsedMarkdownTableAlignment::None,
2270                ),
2271            ])],
2272            vec![],
2273        );
2274
2275        assert_eq!(
2276            parse(markdown).await.children[0],
2277            ParsedMarkdownElement::Table(expected_table)
2278        );
2279    }
2280
2281    #[gpui::test]
2282    async fn test_basic_table() {
2283        let markdown = "\
2284| Header 1 | Header 2 |
2285|----------|----------|
2286| Cell 1   | Cell 2   |
2287| Cell 3   | Cell 4   |";
2288
2289        let expected_table = table(
2290            0..95,
2291            vec![row(vec![
2292                column(
2293                    1,
2294                    1,
2295                    true,
2296                    text("Header 1", 1..11),
2297                    ParsedMarkdownTableAlignment::None,
2298                ),
2299                column(
2300                    1,
2301                    1,
2302                    true,
2303                    text("Header 2", 12..22),
2304                    ParsedMarkdownTableAlignment::None,
2305                ),
2306            ])],
2307            vec![
2308                row(vec![
2309                    column(
2310                        1,
2311                        1,
2312                        false,
2313                        text("Cell 1", 49..59),
2314                        ParsedMarkdownTableAlignment::None,
2315                    ),
2316                    column(
2317                        1,
2318                        1,
2319                        false,
2320                        text("Cell 2", 60..70),
2321                        ParsedMarkdownTableAlignment::None,
2322                    ),
2323                ]),
2324                row(vec![
2325                    column(
2326                        1,
2327                        1,
2328                        false,
2329                        text("Cell 3", 73..83),
2330                        ParsedMarkdownTableAlignment::None,
2331                    ),
2332                    column(
2333                        1,
2334                        1,
2335                        false,
2336                        text("Cell 4", 84..94),
2337                        ParsedMarkdownTableAlignment::None,
2338                    ),
2339                ]),
2340            ],
2341        );
2342
2343        assert_eq!(
2344            parse(markdown).await.children[0],
2345            ParsedMarkdownElement::Table(expected_table)
2346        );
2347    }
2348
2349    #[gpui::test]
2350    async fn test_list_basic() {
2351        let parsed = parse(
2352            "\
2353* Item 1
2354* Item 2
2355* Item 3
2356",
2357        )
2358        .await;
2359
2360        assert_eq!(
2361            parsed.children,
2362            vec![
2363                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2364                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2365                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2366            ],
2367        );
2368    }
2369
2370    #[gpui::test]
2371    async fn test_list_with_tasks() {
2372        let parsed = parse(
2373            "\
2374- [ ] TODO
2375- [x] Checked
2376",
2377        )
2378        .await;
2379
2380        assert_eq!(
2381            parsed.children,
2382            vec![
2383                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2384                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2385            ],
2386        );
2387    }
2388
2389    #[gpui::test]
2390    async fn test_list_with_indented_task() {
2391        let parsed = parse(
2392            "\
2393- [ ] TODO
2394  - [x] Checked
2395  - Unordered
2396  1. Number 1
2397  1. Number 2
23981. Number A
2399",
2400        )
2401        .await;
2402
2403        assert_eq!(
2404            parsed.children,
2405            vec![
2406                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2407                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2408                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2409                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2410                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2411                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2412            ],
2413        );
2414    }
2415
2416    #[gpui::test]
2417    async fn test_list_with_linebreak_is_handled_correctly() {
2418        let parsed = parse(
2419            "\
2420- [ ] Task 1
2421
2422- [x] Task 2
2423",
2424        )
2425        .await;
2426
2427        assert_eq!(
2428            parsed.children,
2429            vec![
2430                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2431                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2432            ],
2433        );
2434    }
2435
2436    #[gpui::test]
2437    async fn test_list_nested() {
2438        let parsed = parse(
2439            "\
2440* Item 1
2441* Item 2
2442* Item 3
2443
24441. Hello
24451. Two
2446   1. Three
24472. Four
24483. Five
2449
2450* First
2451  1. Hello
2452     1. Goodbyte
2453        - Inner
2454        - Inner
2455  2. Goodbyte
2456        - Next item empty
2457        -
2458* Last
2459",
2460        )
2461        .await;
2462
2463        assert_eq!(
2464            parsed.children,
2465            vec![
2466                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2467                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2468                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2469                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2470                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2471                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2472                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2473                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2474                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2475                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2476                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2477                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2478                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2479                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2480                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2481                list_item(186..190, 3, Unordered, vec![]),
2482                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2483            ]
2484        );
2485    }
2486
2487    #[gpui::test]
2488    async fn test_list_with_nested_content() {
2489        let parsed = parse(
2490            "\
2491*   This is a list item with two paragraphs.
2492
2493    This is the second paragraph in the list item.
2494",
2495        )
2496        .await;
2497
2498        assert_eq!(
2499            parsed.children,
2500            vec![list_item(
2501                0..96,
2502                1,
2503                Unordered,
2504                vec![
2505                    p("This is a list item with two paragraphs.", 4..44),
2506                    p("This is the second paragraph in the list item.", 50..97)
2507                ],
2508            ),],
2509        );
2510    }
2511
2512    #[gpui::test]
2513    async fn test_list_item_with_inline_html() {
2514        let parsed = parse(
2515            "\
2516*   This is a list item with an inline HTML <sometag>tag</sometag>.
2517",
2518        )
2519        .await;
2520
2521        assert_eq!(
2522            parsed.children,
2523            vec![list_item(
2524                0..67,
2525                1,
2526                Unordered,
2527                vec![p("This is a list item with an inline HTML tag.", 4..44),],
2528            ),],
2529        );
2530    }
2531
2532    #[gpui::test]
2533    async fn test_nested_list_with_paragraph_inside() {
2534        let parsed = parse(
2535            "\
25361. a
2537    1. b
2538        1. c
2539
2540    text
2541
2542    1. d
2543",
2544        )
2545        .await;
2546
2547        assert_eq!(
2548            parsed.children,
2549            vec![
2550                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2551                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2552                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2553                p("text", 32..37),
2554                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2555            ],
2556        );
2557    }
2558
2559    #[gpui::test]
2560    async fn test_list_with_leading_text() {
2561        let parsed = parse(
2562            "\
2563* `code`
2564* **bold**
2565* [link](https://example.com)
2566",
2567        )
2568        .await;
2569
2570        assert_eq!(
2571            parsed.children,
2572            vec![
2573                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2574                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2575                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2576            ],
2577        );
2578    }
2579
2580    #[gpui::test]
2581    async fn test_simple_block_quote() {
2582        let parsed = parse("> Simple block quote with **styled text**").await;
2583
2584        assert_eq!(
2585            parsed.children,
2586            vec![block_quote(
2587                vec![p("Simple block quote with styled text", 2..41)],
2588                0..41
2589            )]
2590        );
2591    }
2592
2593    #[gpui::test]
2594    async fn test_simple_block_quote_with_multiple_lines() {
2595        let parsed = parse(
2596            "\
2597> # Heading
2598> More
2599> text
2600>
2601> More text
2602",
2603        )
2604        .await;
2605
2606        assert_eq!(
2607            parsed.children,
2608            vec![block_quote(
2609                vec![
2610                    h1(text("Heading", 4..11), 2..12),
2611                    p("More text", 14..26),
2612                    p("More text", 30..40)
2613                ],
2614                0..40
2615            )]
2616        );
2617    }
2618
2619    #[gpui::test]
2620    async fn test_nested_block_quote() {
2621        let parsed = parse(
2622            "\
2623> A
2624>
2625> > # B
2626>
2627> C
2628
2629More text
2630",
2631        )
2632        .await;
2633
2634        assert_eq!(
2635            parsed.children,
2636            vec![
2637                block_quote(
2638                    vec![
2639                        p("A", 2..4),
2640                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2641                        p("C", 18..20)
2642                    ],
2643                    0..20
2644                ),
2645                p("More text", 21..31)
2646            ]
2647        );
2648    }
2649
2650    #[gpui::test]
2651    async fn test_code_block() {
2652        let parsed = parse(
2653            "\
2654```
2655fn main() {
2656    return 0;
2657}
2658```
2659",
2660        )
2661        .await;
2662
2663        assert_eq!(
2664            parsed.children,
2665            vec![code_block(
2666                None,
2667                "fn main() {\n    return 0;\n}",
2668                0..35,
2669                None
2670            )]
2671        );
2672    }
2673
2674    #[gpui::test]
2675    async fn test_code_block_with_language(executor: BackgroundExecutor) {
2676        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2677        language_registry.add(rust_lang());
2678
2679        let parsed = parse_markdown(
2680            "\
2681```rust
2682fn main() {
2683    return 0;
2684}
2685```
2686",
2687            None,
2688            Some(language_registry),
2689        )
2690        .await;
2691
2692        assert_eq!(
2693            parsed.children,
2694            vec![code_block(
2695                Some("rust".to_string()),
2696                "fn main() {\n    return 0;\n}",
2697                0..39,
2698                Some(vec![])
2699            )]
2700        );
2701    }
2702
2703    fn rust_lang() -> Arc<Language> {
2704        Arc::new(Language::new(
2705            LanguageConfig {
2706                name: "Rust".into(),
2707                matcher: LanguageMatcher {
2708                    path_suffixes: vec!["rs".into()],
2709                    ..Default::default()
2710                },
2711                collapsed_placeholder: " /* ... */ ".to_string(),
2712                ..Default::default()
2713            },
2714            Some(tree_sitter_rust::LANGUAGE.into()),
2715        ))
2716    }
2717
2718    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2719        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2720            source_range,
2721            level: HeadingLevel::H1,
2722            contents,
2723        })
2724    }
2725
2726    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2727        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2728            source_range,
2729            level: HeadingLevel::H2,
2730            contents,
2731        })
2732    }
2733
2734    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2735        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2736            source_range,
2737            level: HeadingLevel::H3,
2738            contents,
2739        })
2740    }
2741
2742    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2743        ParsedMarkdownElement::Paragraph(text(contents, source_range))
2744    }
2745
2746    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2747        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2748            highlights: Vec::new(),
2749            region_ranges: Vec::new(),
2750            regions: Vec::new(),
2751            source_range,
2752            contents: contents.to_string().into(),
2753        })]
2754    }
2755
2756    fn block_quote(
2757        children: Vec<ParsedMarkdownElement>,
2758        source_range: Range<usize>,
2759    ) -> ParsedMarkdownElement {
2760        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2761            source_range,
2762            children,
2763        })
2764    }
2765
2766    fn code_block(
2767        language: Option<String>,
2768        code: &str,
2769        source_range: Range<usize>,
2770        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2771    ) -> ParsedMarkdownElement {
2772        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2773            source_range,
2774            language,
2775            contents: code.to_string().into(),
2776            highlights,
2777        })
2778    }
2779
2780    fn list_item(
2781        source_range: Range<usize>,
2782        depth: u16,
2783        item_type: ParsedMarkdownListItemType,
2784        content: Vec<ParsedMarkdownElement>,
2785    ) -> ParsedMarkdownElement {
2786        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2787            source_range,
2788            item_type,
2789            depth,
2790            content,
2791            nested: false,
2792        })
2793    }
2794
2795    fn nested_list_item(
2796        source_range: Range<usize>,
2797        depth: u16,
2798        item_type: ParsedMarkdownListItemType,
2799        content: Vec<ParsedMarkdownElement>,
2800    ) -> ParsedMarkdownElement {
2801        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2802            source_range,
2803            item_type,
2804            depth,
2805            content,
2806            nested: true,
2807        })
2808    }
2809
2810    fn table(
2811        source_range: Range<usize>,
2812        header: Vec<ParsedMarkdownTableRow>,
2813        body: Vec<ParsedMarkdownTableRow>,
2814    ) -> ParsedMarkdownTable {
2815        ParsedMarkdownTable {
2816            source_range,
2817            header,
2818            body,
2819        }
2820    }
2821
2822    fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
2823        ParsedMarkdownTableRow { columns }
2824    }
2825
2826    fn column(
2827        col_span: usize,
2828        row_span: usize,
2829        is_header: bool,
2830        children: MarkdownParagraph,
2831        alignment: ParsedMarkdownTableAlignment,
2832    ) -> ParsedMarkdownTableColumn {
2833        ParsedMarkdownTableColumn {
2834            col_span,
2835            row_span,
2836            is_header,
2837            children,
2838            alignment,
2839        }
2840    }
2841
2842    impl PartialEq for ParsedMarkdownTable {
2843        fn eq(&self, other: &Self) -> bool {
2844            self.source_range == other.source_range
2845                && self.header == other.header
2846                && self.body == other.body
2847        }
2848    }
2849
2850    impl PartialEq for ParsedMarkdownText {
2851        fn eq(&self, other: &Self) -> bool {
2852            self.source_range == other.source_range && self.contents == other.contents
2853        }
2854    }
2855}