markdown_parser.rs

   1use crate::{
   2    markdown_elements::*,
   3    markdown_minifier::{Minifier, MinifierOptions},
   4};
   5use async_recursion::async_recursion;
   6use collections::FxHashMap;
   7use gpui::{DefiniteLength, FontWeight, px, relative};
   8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
   9use language::LanguageRegistry;
  10use markup5ever_rcdom::RcDom;
  11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
  12use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
  13
  14pub async fn parse_markdown(
  15    markdown_input: &str,
  16    file_location_directory: Option<PathBuf>,
  17    language_registry: Option<Arc<LanguageRegistry>>,
  18) -> ParsedMarkdown {
  19    let mut options = Options::all();
  20    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  21
  22    let parser = Parser::new_ext(markdown_input, options);
  23    let parser = MarkdownParser::new(
  24        parser.into_offset_iter().collect(),
  25        file_location_directory,
  26        language_registry,
  27    );
  28    let renderer = parser.parse_document().await;
  29    ParsedMarkdown {
  30        children: renderer.parsed,
  31    }
  32}
  33
  34fn cleanup_html(source: &str) -> Vec<u8> {
  35    let mut writer = std::io::Cursor::new(Vec::new());
  36    let mut reader = std::io::Cursor::new(source);
  37    let mut minify = Minifier::new(
  38        &mut writer,
  39        MinifierOptions {
  40            omit_doctype: true,
  41            collapse_whitespace: true,
  42            ..Default::default()
  43        },
  44    );
  45    if let Ok(()) = minify.minify(&mut reader) {
  46        writer.into_inner()
  47    } else {
  48        source.bytes().collect()
  49    }
  50}
  51
  52struct MarkdownParser<'a> {
  53    tokens: Vec<(Event<'a>, Range<usize>)>,
  54    /// The current index in the tokens array
  55    cursor: usize,
  56    /// The blocks that we have successfully parsed so far
  57    parsed: Vec<ParsedMarkdownElement>,
  58    file_location_directory: Option<PathBuf>,
  59    language_registry: Option<Arc<LanguageRegistry>>,
  60}
  61
  62struct MarkdownListItem {
  63    content: Vec<ParsedMarkdownElement>,
  64    item_type: ParsedMarkdownListItemType,
  65}
  66
  67impl Default for MarkdownListItem {
  68    fn default() -> Self {
  69        Self {
  70            content: Vec::new(),
  71            item_type: ParsedMarkdownListItemType::Unordered,
  72        }
  73    }
  74}
  75
  76impl<'a> MarkdownParser<'a> {
  77    fn new(
  78        tokens: Vec<(Event<'a>, Range<usize>)>,
  79        file_location_directory: Option<PathBuf>,
  80        language_registry: Option<Arc<LanguageRegistry>>,
  81    ) -> Self {
  82        Self {
  83            tokens,
  84            file_location_directory,
  85            language_registry,
  86            cursor: 0,
  87            parsed: vec![],
  88        }
  89    }
  90
  91    fn eof(&self) -> bool {
  92        if self.tokens.is_empty() {
  93            return true;
  94        }
  95        self.cursor >= self.tokens.len() - 1
  96    }
  97
  98    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
  99        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
 100            return self.tokens.last();
 101        }
 102        self.tokens.get(self.cursor + steps)
 103    }
 104
 105    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
 106        if self.cursor == 0 || self.cursor > self.tokens.len() {
 107            return None;
 108        }
 109        self.tokens.get(self.cursor - 1)
 110    }
 111
 112    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
 113        self.peek(0)
 114    }
 115
 116    fn current_event(&self) -> Option<&Event<'_>> {
 117        self.current().map(|(event, _)| event)
 118    }
 119
 120    fn is_text_like(event: &Event) -> bool {
 121        match event {
 122            Event::Text(_)
 123            // Represent an inline code block
 124            | Event::Code(_)
 125            | Event::Html(_)
 126            | Event::InlineHtml(_)
 127            | Event::FootnoteReference(_)
 128            | Event::Start(Tag::Link { .. })
 129            | Event::Start(Tag::Emphasis)
 130            | Event::Start(Tag::Strong)
 131            | Event::Start(Tag::Strikethrough)
 132            | Event::Start(Tag::Image { .. }) => {
 133                true
 134            }
 135            _ => false,
 136        }
 137    }
 138
 139    async fn parse_document(mut self) -> Self {
 140        while !self.eof() {
 141            if let Some(block) = self.parse_block().await {
 142                self.parsed.extend(block);
 143            } else {
 144                self.cursor += 1;
 145            }
 146        }
 147        self
 148    }
 149
 150    #[async_recursion]
 151    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 152        let (current, source_range) = self.current().unwrap();
 153        let source_range = source_range.clone();
 154        match current {
 155            Event::Start(tag) => match tag {
 156                Tag::Paragraph => {
 157                    self.cursor += 1;
 158                    let text = self.parse_text(false, Some(source_range));
 159                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 160                }
 161                Tag::Heading { level, .. } => {
 162                    let level = *level;
 163                    self.cursor += 1;
 164                    let heading = self.parse_heading(level);
 165                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 166                }
 167                Tag::Table(alignment) => {
 168                    let alignment = alignment.clone();
 169                    self.cursor += 1;
 170                    let table = self.parse_table(alignment);
 171                    Some(vec![ParsedMarkdownElement::Table(table)])
 172                }
 173                Tag::List(order) => {
 174                    let order = *order;
 175                    self.cursor += 1;
 176                    let list = self.parse_list(order).await;
 177                    Some(list)
 178                }
 179                Tag::BlockQuote(_kind) => {
 180                    self.cursor += 1;
 181                    let block_quote = self.parse_block_quote().await;
 182                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 183                }
 184                Tag::CodeBlock(kind) => {
 185                    let language = match kind {
 186                        pulldown_cmark::CodeBlockKind::Indented => None,
 187                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 188                            if language.is_empty() {
 189                                None
 190                            } else {
 191                                Some(language.to_string())
 192                            }
 193                        }
 194                    };
 195
 196                    self.cursor += 1;
 197
 198                    let code_block = self.parse_code_block(language).await?;
 199                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 200                }
 201                Tag::HtmlBlock => {
 202                    self.cursor += 1;
 203
 204                    Some(self.parse_html_block().await)
 205                }
 206                _ => None,
 207            },
 208            Event::Rule => {
 209                self.cursor += 1;
 210                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 211            }
 212            _ => None,
 213        }
 214    }
 215
 216    fn parse_text(
 217        &mut self,
 218        should_complete_on_soft_break: bool,
 219        source_range: Option<Range<usize>>,
 220    ) -> MarkdownParagraph {
 221        let source_range = source_range.unwrap_or_else(|| {
 222            self.current()
 223                .map(|(_, range)| range.clone())
 224                .unwrap_or_default()
 225        });
 226
 227        let mut markdown_text_like = Vec::new();
 228        let mut text = String::new();
 229        let mut bold_depth = 0;
 230        let mut italic_depth = 0;
 231        let mut strikethrough_depth = 0;
 232        let mut link: Option<Link> = None;
 233        let mut image: Option<Image> = None;
 234        let mut region_ranges: Vec<Range<usize>> = vec![];
 235        let mut regions: Vec<ParsedRegion> = vec![];
 236        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 237        let mut link_urls: Vec<String> = vec![];
 238        let mut link_ranges: Vec<Range<usize>> = vec![];
 239
 240        loop {
 241            if self.eof() {
 242                break;
 243            }
 244
 245            let (current, _) = self.current().unwrap();
 246            let prev_len = text.len();
 247            match current {
 248                Event::SoftBreak => {
 249                    if should_complete_on_soft_break {
 250                        break;
 251                    }
 252                    text.push(' ');
 253                }
 254
 255                Event::HardBreak => {
 256                    text.push('\n');
 257                }
 258
 259                // We want to ignore any inline HTML tags in the text but keep
 260                // the text between them
 261                Event::InlineHtml(_) => {}
 262
 263                Event::Text(t) => {
 264                    text.push_str(t.as_ref());
 265                    let mut style = MarkdownHighlightStyle::default();
 266
 267                    if bold_depth > 0 {
 268                        style.weight = FontWeight::BOLD;
 269                    }
 270
 271                    if italic_depth > 0 {
 272                        style.italic = true;
 273                    }
 274
 275                    if strikethrough_depth > 0 {
 276                        style.strikethrough = true;
 277                    }
 278
 279                    let last_run_len = if let Some(link) = link.clone() {
 280                        region_ranges.push(prev_len..text.len());
 281                        regions.push(ParsedRegion {
 282                            code: false,
 283                            link: Some(link),
 284                        });
 285                        style.link = true;
 286                        prev_len
 287                    } else {
 288                        // Manually scan for links
 289                        let mut finder = linkify::LinkFinder::new();
 290                        finder.kinds(&[linkify::LinkKind::Url]);
 291                        let mut last_link_len = prev_len;
 292                        for link in finder.links(t) {
 293                            let start = link.start();
 294                            let end = link.end();
 295                            let range = (prev_len + start)..(prev_len + end);
 296                            link_ranges.push(range.clone());
 297                            link_urls.push(link.as_str().to_string());
 298
 299                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 300                            if style != MarkdownHighlightStyle::default()
 301                                && last_link_len < link.start()
 302                            {
 303                                highlights.push((
 304                                    last_link_len..link.start(),
 305                                    MarkdownHighlight::Style(style.clone()),
 306                                ));
 307                            }
 308
 309                            highlights.push((
 310                                range.clone(),
 311                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 312                                    underline: true,
 313                                    ..style
 314                                }),
 315                            ));
 316                            region_ranges.push(range.clone());
 317                            regions.push(ParsedRegion {
 318                                code: false,
 319                                link: Some(Link::Web {
 320                                    url: link.as_str().to_string(),
 321                                }),
 322                            });
 323                            last_link_len = end;
 324                        }
 325                        last_link_len
 326                    };
 327
 328                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 329                        let mut new_highlight = true;
 330                        if let Some((last_range, last_style)) = highlights.last_mut()
 331                            && last_range.end == last_run_len
 332                            && last_style == &MarkdownHighlight::Style(style.clone())
 333                        {
 334                            last_range.end = text.len();
 335                            new_highlight = false;
 336                        }
 337                        if new_highlight {
 338                            highlights.push((
 339                                last_run_len..text.len(),
 340                                MarkdownHighlight::Style(style.clone()),
 341                            ));
 342                        }
 343                    }
 344                }
 345                Event::Code(t) => {
 346                    text.push_str(t.as_ref());
 347                    region_ranges.push(prev_len..text.len());
 348
 349                    if link.is_some() {
 350                        highlights.push((
 351                            prev_len..text.len(),
 352                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 353                                link: true,
 354                                ..Default::default()
 355                            }),
 356                        ));
 357                    }
 358                    regions.push(ParsedRegion {
 359                        code: true,
 360                        link: link.clone(),
 361                    });
 362                }
 363                Event::Start(tag) => match tag {
 364                    Tag::Emphasis => italic_depth += 1,
 365                    Tag::Strong => bold_depth += 1,
 366                    Tag::Strikethrough => strikethrough_depth += 1,
 367                    Tag::Link { dest_url, .. } => {
 368                        link = Link::identify(
 369                            self.file_location_directory.clone(),
 370                            dest_url.to_string(),
 371                        );
 372                    }
 373                    Tag::Image { dest_url, .. } => {
 374                        if !text.is_empty() {
 375                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 376                                source_range: source_range.clone(),
 377                                contents: text.clone(),
 378                                highlights: highlights.clone(),
 379                                region_ranges: region_ranges.clone(),
 380                                regions: regions.clone(),
 381                            });
 382                            text = String::new();
 383                            highlights = vec![];
 384                            region_ranges = vec![];
 385                            regions = vec![];
 386                            markdown_text_like.push(parsed_regions);
 387                        }
 388                        image = Image::identify(
 389                            dest_url.to_string(),
 390                            source_range.clone(),
 391                            self.file_location_directory.clone(),
 392                        );
 393                    }
 394                    _ => {
 395                        break;
 396                    }
 397                },
 398
 399                Event::End(tag) => match tag {
 400                    TagEnd::Emphasis => italic_depth -= 1,
 401                    TagEnd::Strong => bold_depth -= 1,
 402                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 403                    TagEnd::Link => {
 404                        link = None;
 405                    }
 406                    TagEnd::Image => {
 407                        if let Some(mut image) = image.take() {
 408                            if !text.is_empty() {
 409                                image.set_alt_text(std::mem::take(&mut text).into());
 410                            }
 411                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 412                        }
 413                    }
 414                    TagEnd::Paragraph => {
 415                        self.cursor += 1;
 416                        break;
 417                    }
 418                    _ => {
 419                        break;
 420                    }
 421                },
 422                _ => {
 423                    break;
 424                }
 425            }
 426
 427            self.cursor += 1;
 428        }
 429        if !text.is_empty() {
 430            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 431                source_range,
 432                contents: text,
 433                highlights,
 434                regions,
 435                region_ranges,
 436            }));
 437        }
 438        markdown_text_like
 439    }
 440
 441    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 442        let (_event, source_range) = self.previous().unwrap();
 443        let source_range = source_range.clone();
 444        let text = self.parse_text(true, None);
 445
 446        // Advance past the heading end tag
 447        self.cursor += 1;
 448
 449        ParsedMarkdownHeading {
 450            source_range,
 451            level: match level {
 452                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 453                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 454                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 455                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 456                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 457                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 458            },
 459            contents: text,
 460        }
 461    }
 462
 463    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 464        let (_event, source_range) = self.previous().unwrap();
 465        let source_range = source_range.clone();
 466        let mut header = ParsedMarkdownTableRow::new();
 467        let mut body = vec![];
 468        let mut current_row = vec![];
 469        let mut in_header = true;
 470        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 471
 472        loop {
 473            if self.eof() {
 474                break;
 475            }
 476
 477            let (current, source_range) = self.current().unwrap();
 478            let source_range = source_range.clone();
 479            match current {
 480                Event::Start(Tag::TableHead)
 481                | Event::Start(Tag::TableRow)
 482                | Event::End(TagEnd::TableCell) => {
 483                    self.cursor += 1;
 484                }
 485                Event::Start(Tag::TableCell) => {
 486                    self.cursor += 1;
 487                    let cell_contents = self.parse_text(false, Some(source_range));
 488                    current_row.push(cell_contents);
 489                }
 490                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 491                    self.cursor += 1;
 492                    let new_row = std::mem::take(&mut current_row);
 493                    if in_header {
 494                        header.children = new_row;
 495                        in_header = false;
 496                    } else {
 497                        let row = ParsedMarkdownTableRow::with_children(new_row);
 498                        body.push(row);
 499                    }
 500                }
 501                Event::End(TagEnd::Table) => {
 502                    self.cursor += 1;
 503                    break;
 504                }
 505                _ => {
 506                    break;
 507                }
 508            }
 509        }
 510
 511        ParsedMarkdownTable {
 512            source_range,
 513            header,
 514            body,
 515            column_alignments,
 516        }
 517    }
 518
 519    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 520        match alignment {
 521            Alignment::None => ParsedMarkdownTableAlignment::None,
 522            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 523            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 524            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 525        }
 526    }
 527
 528    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 529        let (_, list_source_range) = self.previous().unwrap();
 530
 531        let mut items = Vec::new();
 532        let mut items_stack = vec![MarkdownListItem::default()];
 533        let mut depth = 1;
 534        let mut order = order;
 535        let mut order_stack = Vec::new();
 536
 537        let mut insertion_indices = FxHashMap::default();
 538        let mut source_ranges = FxHashMap::default();
 539        let mut start_item_range = list_source_range.clone();
 540
 541        while !self.eof() {
 542            let (current, source_range) = self.current().unwrap();
 543            match current {
 544                Event::Start(Tag::List(new_order)) => {
 545                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 546                        insertion_indices.insert(depth, items.len());
 547                    }
 548
 549                    // We will use the start of the nested list as the end for the current item's range,
 550                    // because we don't care about the hierarchy of list items
 551                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 552                        e.insert(start_item_range.start..source_range.start);
 553                    }
 554
 555                    order_stack.push(order);
 556                    order = *new_order;
 557                    self.cursor += 1;
 558                    depth += 1;
 559                }
 560                Event::End(TagEnd::List(_)) => {
 561                    order = order_stack.pop().flatten();
 562                    self.cursor += 1;
 563                    depth -= 1;
 564
 565                    if depth == 0 {
 566                        break;
 567                    }
 568                }
 569                Event::Start(Tag::Item) => {
 570                    start_item_range = source_range.clone();
 571
 572                    self.cursor += 1;
 573                    items_stack.push(MarkdownListItem::default());
 574
 575                    let mut task_list = None;
 576                    // Check for task list marker (`- [ ]` or `- [x]`)
 577                    if let Some(event) = self.current_event() {
 578                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 579                        if event == &Event::Start(Tag::Paragraph) {
 580                            self.cursor += 1;
 581                        }
 582
 583                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 584                            task_list = Some((*checked, range.clone()));
 585                            self.cursor += 1;
 586                        }
 587                    }
 588
 589                    if let Some((event, range)) = self.current() {
 590                        // This is a plain list item.
 591                        // For example `- some text` or `1. [Docs](./docs.md)`
 592                        if MarkdownParser::is_text_like(event) {
 593                            let text = self.parse_text(false, Some(range.clone()));
 594                            let block = ParsedMarkdownElement::Paragraph(text);
 595                            if let Some(content) = items_stack.last_mut() {
 596                                let item_type = if let Some((checked, range)) = task_list {
 597                                    ParsedMarkdownListItemType::Task(checked, range)
 598                                } else if let Some(order) = order {
 599                                    ParsedMarkdownListItemType::Ordered(order)
 600                                } else {
 601                                    ParsedMarkdownListItemType::Unordered
 602                                };
 603                                content.item_type = item_type;
 604                                content.content.push(block);
 605                            }
 606                        } else {
 607                            let block = self.parse_block().await;
 608                            if let Some(block) = block
 609                                && let Some(list_item) = items_stack.last_mut()
 610                            {
 611                                list_item.content.extend(block);
 612                            }
 613                        }
 614                    }
 615
 616                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 617                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 618                        self.cursor += 1;
 619                    }
 620                }
 621                Event::End(TagEnd::Item) => {
 622                    self.cursor += 1;
 623
 624                    if let Some(current) = order {
 625                        order = Some(current + 1);
 626                    }
 627
 628                    if let Some(list_item) = items_stack.pop() {
 629                        let source_range = source_ranges
 630                            .remove(&depth)
 631                            .unwrap_or(start_item_range.clone());
 632
 633                        // We need to remove the last character of the source range, because it includes the newline character
 634                        let source_range = source_range.start..source_range.end - 1;
 635                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 636                            source_range,
 637                            content: list_item.content,
 638                            depth,
 639                            item_type: list_item.item_type,
 640                        });
 641
 642                        if let Some(index) = insertion_indices.get(&depth) {
 643                            items.insert(*index, item);
 644                            insertion_indices.remove(&depth);
 645                        } else {
 646                            items.push(item);
 647                        }
 648                    }
 649                }
 650                _ => {
 651                    if depth == 0 {
 652                        break;
 653                    }
 654                    // This can only happen if a list item starts with more then one paragraph,
 655                    // or the list item contains blocks that should be rendered after the nested list items
 656                    let block = self.parse_block().await;
 657                    if let Some(block) = block {
 658                        if let Some(list_item) = items_stack.last_mut() {
 659                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 660                            if !insertion_indices.contains_key(&depth) {
 661                                list_item.content.extend(block);
 662                                continue;
 663                            }
 664                        }
 665
 666                        // Otherwise we need to insert the block after all the nested items
 667                        // that have been parsed so far
 668                        items.extend(block);
 669                    } else {
 670                        self.cursor += 1;
 671                    }
 672                }
 673            }
 674        }
 675
 676        items
 677    }
 678
 679    #[async_recursion]
 680    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 681        let (_event, source_range) = self.previous().unwrap();
 682        let source_range = source_range.clone();
 683        let mut nested_depth = 1;
 684
 685        let mut children: Vec<ParsedMarkdownElement> = vec![];
 686
 687        while !self.eof() {
 688            let block = self.parse_block().await;
 689
 690            if let Some(block) = block {
 691                children.extend(block);
 692            } else {
 693                break;
 694            }
 695
 696            if self.eof() {
 697                break;
 698            }
 699
 700            let (current, _source_range) = self.current().unwrap();
 701            match current {
 702                // This is a nested block quote.
 703                // Record that we're in a nested block quote and continue parsing.
 704                // We don't need to advance the cursor since the next
 705                // call to `parse_block` will handle it.
 706                Event::Start(Tag::BlockQuote(_kind)) => {
 707                    nested_depth += 1;
 708                }
 709                Event::End(TagEnd::BlockQuote(_kind)) => {
 710                    nested_depth -= 1;
 711                    if nested_depth == 0 {
 712                        self.cursor += 1;
 713                        break;
 714                    }
 715                }
 716                _ => {}
 717            };
 718        }
 719
 720        ParsedMarkdownBlockQuote {
 721            source_range,
 722            children,
 723        }
 724    }
 725
 726    async fn parse_code_block(
 727        &mut self,
 728        language: Option<String>,
 729    ) -> Option<ParsedMarkdownCodeBlock> {
 730        let Some((_event, source_range)) = self.previous() else {
 731            return None;
 732        };
 733
 734        let source_range = source_range.clone();
 735        let mut code = String::new();
 736
 737        while !self.eof() {
 738            let Some((current, _source_range)) = self.current() else {
 739                break;
 740            };
 741
 742            match current {
 743                Event::Text(text) => {
 744                    code.push_str(text);
 745                    self.cursor += 1;
 746                }
 747                Event::End(TagEnd::CodeBlock) => {
 748                    self.cursor += 1;
 749                    break;
 750                }
 751                _ => {
 752                    break;
 753                }
 754            }
 755        }
 756
 757        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 758
 759        let highlights = if let Some(language) = &language {
 760            if let Some(registry) = &self.language_registry {
 761                let rope: language::Rope = code.as_str().into();
 762                registry
 763                    .language_for_name_or_extension(language)
 764                    .await
 765                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 766                    .ok()
 767            } else {
 768                None
 769            }
 770        } else {
 771            None
 772        };
 773
 774        Some(ParsedMarkdownCodeBlock {
 775            source_range,
 776            contents: code.into(),
 777            language,
 778            highlights,
 779        })
 780    }
 781
 782    async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
 783        let mut elements = Vec::new();
 784        let Some((_event, _source_range)) = self.previous() else {
 785            return elements;
 786        };
 787
 788        let mut html_source_range_start = None;
 789        let mut html_source_range_end = None;
 790        let mut html_buffer = String::new();
 791
 792        while !self.eof() {
 793            let Some((current, source_range)) = self.current() else {
 794                break;
 795            };
 796            let source_range = source_range.clone();
 797            match current {
 798                Event::Html(html) => {
 799                    html_source_range_start.get_or_insert(source_range.start);
 800                    html_source_range_end = Some(source_range.end);
 801                    html_buffer.push_str(html);
 802                    self.cursor += 1;
 803                }
 804                Event::End(TagEnd::CodeBlock) => {
 805                    self.cursor += 1;
 806                    break;
 807                }
 808                _ => {
 809                    break;
 810                }
 811            }
 812        }
 813
 814        let bytes = cleanup_html(&html_buffer);
 815
 816        let mut cursor = std::io::Cursor::new(bytes);
 817        if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
 818            .from_utf8()
 819            .read_from(&mut cursor)
 820            && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
 821        {
 822            self.parse_html_node(start..end, &dom.document, &mut elements);
 823        }
 824
 825        elements
 826    }
 827
 828    fn parse_html_node(
 829        &self,
 830        source_range: Range<usize>,
 831        node: &Rc<markup5ever_rcdom::Node>,
 832        elements: &mut Vec<ParsedMarkdownElement>,
 833    ) {
 834        match &node.data {
 835            markup5ever_rcdom::NodeData::Document => {
 836                self.consume_children(source_range, node, elements);
 837            }
 838            markup5ever_rcdom::NodeData::Doctype { .. } => {}
 839            markup5ever_rcdom::NodeData::Text { contents } => {
 840                elements.push(ParsedMarkdownElement::Paragraph(vec![
 841                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
 842                        source_range,
 843                        contents: contents.borrow().to_string(),
 844                        highlights: Vec::default(),
 845                        region_ranges: Vec::default(),
 846                        regions: Vec::default(),
 847                    }),
 848                ]));
 849            }
 850            markup5ever_rcdom::NodeData::Comment { .. } => {}
 851            markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
 852                if local_name!("img") == name.local {
 853                    if let Some(image) = self.extract_image(source_range, attrs) {
 854                        elements.push(ParsedMarkdownElement::Image(image));
 855                    }
 856                } else if matches!(
 857                    name.local,
 858                    local_name!("h1")
 859                        | local_name!("h2")
 860                        | local_name!("h3")
 861                        | local_name!("h4")
 862                        | local_name!("h5")
 863                        | local_name!("h6")
 864                ) {
 865                    let mut paragraph = MarkdownParagraph::new();
 866                    self.consume_paragraph(source_range.clone(), node, &mut paragraph);
 867
 868                    if !paragraph.is_empty() {
 869                        elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
 870                            source_range,
 871                            level: match name.local {
 872                                local_name!("h1") => HeadingLevel::H1,
 873                                local_name!("h2") => HeadingLevel::H2,
 874                                local_name!("h3") => HeadingLevel::H3,
 875                                local_name!("h4") => HeadingLevel::H4,
 876                                local_name!("h5") => HeadingLevel::H5,
 877                                local_name!("h6") => HeadingLevel::H6,
 878                                _ => unreachable!(),
 879                            },
 880                            contents: paragraph,
 881                        }));
 882                    }
 883                } else if local_name!("blockquote") == name.local {
 884                    if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
 885                        elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
 886                    }
 887                } else if local_name!("table") == name.local {
 888                    if let Some(table) = self.extract_html_table(node, source_range) {
 889                        elements.push(ParsedMarkdownElement::Table(table));
 890                    }
 891                } else {
 892                    self.consume_children(source_range, node, elements);
 893                }
 894            }
 895            markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
 896        }
 897    }
 898
 899    fn parse_paragraph(
 900        &self,
 901        source_range: Range<usize>,
 902        node: &Rc<markup5ever_rcdom::Node>,
 903        paragraph: &mut MarkdownParagraph,
 904    ) {
 905        match &node.data {
 906            markup5ever_rcdom::NodeData::Text { contents } => {
 907                paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 908                    source_range,
 909                    regions: Vec::default(),
 910                    contents: contents.borrow().to_string(),
 911                    region_ranges: Vec::default(),
 912                    highlights: Vec::default(),
 913                }));
 914            }
 915            markup5ever_rcdom::NodeData::Element { .. } => {
 916                self.consume_paragraph(source_range, node, paragraph);
 917            }
 918            _ => {}
 919        }
 920    }
 921
 922    fn consume_paragraph(
 923        &self,
 924        source_range: Range<usize>,
 925        node: &Rc<markup5ever_rcdom::Node>,
 926        paragraph: &mut MarkdownParagraph,
 927    ) {
 928        for node in node.children.borrow().iter() {
 929            self.parse_paragraph(source_range.clone(), node, paragraph);
 930        }
 931    }
 932
 933    fn consume_children(
 934        &self,
 935        source_range: Range<usize>,
 936        node: &Rc<markup5ever_rcdom::Node>,
 937        elements: &mut Vec<ParsedMarkdownElement>,
 938    ) {
 939        for node in node.children.borrow().iter() {
 940            self.parse_html_node(source_range.clone(), node, elements);
 941        }
 942    }
 943
 944    fn attr_value(
 945        attrs: &RefCell<Vec<html5ever::Attribute>>,
 946        name: html5ever::LocalName,
 947    ) -> Option<String> {
 948        attrs.borrow().iter().find_map(|attr| {
 949            if attr.name.local == name {
 950                Some(attr.value.to_string())
 951            } else {
 952                None
 953            }
 954        })
 955    }
 956
 957    fn extract_styles_from_attributes(
 958        attrs: &RefCell<Vec<html5ever::Attribute>>,
 959    ) -> HashMap<String, String> {
 960        let mut styles = HashMap::new();
 961
 962        if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
 963            for decl in style.split(';') {
 964                let mut parts = decl.splitn(2, ':');
 965                if let Some((key, value)) = parts.next().zip(parts.next()) {
 966                    styles.insert(
 967                        key.trim().to_lowercase().to_string(),
 968                        value.trim().to_string(),
 969                    );
 970                }
 971            }
 972        }
 973
 974        styles
 975    }
 976
 977    fn extract_image(
 978        &self,
 979        source_range: Range<usize>,
 980        attrs: &RefCell<Vec<html5ever::Attribute>>,
 981    ) -> Option<Image> {
 982        let src = Self::attr_value(attrs, local_name!("src"))?;
 983
 984        let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
 985
 986        if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
 987            image.set_alt_text(alt.into());
 988        }
 989
 990        let styles = Self::extract_styles_from_attributes(attrs);
 991
 992        if let Some(width) = Self::attr_value(attrs, local_name!("width"))
 993            .or_else(|| styles.get("width").cloned())
 994            .and_then(|width| Self::parse_length(&width))
 995        {
 996            image.set_width(width);
 997        }
 998
 999        if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1000            .or_else(|| styles.get("height").cloned())
1001            .and_then(|height| Self::parse_length(&height))
1002        {
1003            image.set_height(height);
1004        }
1005
1006        Some(image)
1007    }
1008
1009    fn extract_html_blockquote(
1010        &self,
1011        node: &Rc<markup5ever_rcdom::Node>,
1012        source_range: Range<usize>,
1013    ) -> Option<ParsedMarkdownBlockQuote> {
1014        let mut children = Vec::new();
1015        self.consume_children(source_range.clone(), node, &mut children);
1016
1017        if children.is_empty() {
1018            None
1019        } else {
1020            Some(ParsedMarkdownBlockQuote {
1021                children,
1022                source_range,
1023            })
1024        }
1025    }
1026
1027    fn extract_html_table(
1028        &self,
1029        node: &Rc<markup5ever_rcdom::Node>,
1030        source_range: Range<usize>,
1031    ) -> Option<ParsedMarkdownTable> {
1032        let mut header_columns = Vec::new();
1033        let mut body_rows = Vec::new();
1034
1035        // node should be a thead or tbody element
1036        for node in node.children.borrow().iter() {
1037            match &node.data {
1038                markup5ever_rcdom::NodeData::Element { name, .. } => {
1039                    if local_name!("thead") == name.local {
1040                        // node should be a tr element
1041                        for node in node.children.borrow().iter() {
1042                            let mut paragraph = MarkdownParagraph::new();
1043                            self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1044
1045                            for paragraph in paragraph.into_iter() {
1046                                header_columns.push(vec![paragraph]);
1047                            }
1048                        }
1049                    } else if local_name!("tbody") == name.local {
1050                        // node should be a tr element
1051                        for node in node.children.borrow().iter() {
1052                            let mut row = MarkdownParagraph::new();
1053                            self.consume_paragraph(source_range.clone(), node, &mut row);
1054                            body_rows.push(ParsedMarkdownTableRow::with_children(
1055                                row.into_iter().map(|column| vec![column]).collect(),
1056                            ));
1057                        }
1058                    }
1059                }
1060                _ => {}
1061            }
1062        }
1063
1064        if !header_columns.is_empty() || !body_rows.is_empty() {
1065            Some(ParsedMarkdownTable {
1066                source_range,
1067                body: body_rows,
1068                column_alignments: Vec::default(),
1069                header: ParsedMarkdownTableRow::with_children(header_columns),
1070            })
1071        } else {
1072            None
1073        }
1074    }
1075
1076    /// Parses the width/height attribute value of an html element (e.g. img element)
1077    fn parse_length(value: &str) -> Option<DefiniteLength> {
1078        if value.ends_with("%") {
1079            value
1080                .trim_end_matches("%")
1081                .parse::<f32>()
1082                .ok()
1083                .map(|value| relative(value / 100.))
1084        } else {
1085            value
1086                .trim_end_matches("px")
1087                .parse()
1088                .ok()
1089                .map(|value| px(value).into())
1090        }
1091    }
1092}
1093
1094#[cfg(test)]
1095mod tests {
1096    use super::*;
1097    use ParsedMarkdownListItemType::*;
1098    use core::panic;
1099    use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1100    use language::{
1101        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1102    };
1103    use pretty_assertions::assert_eq;
1104
1105    async fn parse(input: &str) -> ParsedMarkdown {
1106        parse_markdown(input, None, None).await
1107    }
1108
1109    #[gpui::test]
1110    async fn test_headings() {
1111        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1112
1113        assert_eq!(
1114            parsed.children,
1115            vec![
1116                h1(text("Heading one", 2..13), 0..14),
1117                h2(text("Heading two", 17..28), 14..29),
1118                h3(text("Heading three", 33..46), 29..46),
1119            ]
1120        );
1121    }
1122
1123    #[gpui::test]
1124    async fn test_newlines_dont_new_paragraphs() {
1125        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1126
1127        assert_eq!(
1128            parsed.children,
1129            vec![p("Some text that is bolded and italicized", 0..46)]
1130        );
1131    }
1132
1133    #[gpui::test]
1134    async fn test_heading_with_paragraph() {
1135        let parsed = parse("# Zed\nThe editor").await;
1136
1137        assert_eq!(
1138            parsed.children,
1139            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1140        );
1141    }
1142
1143    #[gpui::test]
1144    async fn test_double_newlines_do_new_paragraphs() {
1145        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1146
1147        assert_eq!(
1148            parsed.children,
1149            vec![
1150                p("Some text that is bolded", 0..29),
1151                p("and italicized", 31..47),
1152            ]
1153        );
1154    }
1155
1156    #[gpui::test]
1157    async fn test_bold_italic_text() {
1158        let parsed = parse("Some text **that is bolded** and *italicized*").await;
1159
1160        assert_eq!(
1161            parsed.children,
1162            vec![p("Some text that is bolded and italicized", 0..45)]
1163        );
1164    }
1165
1166    #[gpui::test]
1167    async fn test_nested_bold_strikethrough_text() {
1168        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1169
1170        assert_eq!(parsed.children.len(), 1);
1171        assert_eq!(
1172            parsed.children[0],
1173            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1174                ParsedMarkdownText {
1175                    source_range: 0..35,
1176                    contents: "Some bostrikethroughld text".to_string(),
1177                    highlights: Vec::new(),
1178                    region_ranges: Vec::new(),
1179                    regions: Vec::new(),
1180                }
1181            )])
1182        );
1183
1184        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1185            text
1186        } else {
1187            panic!("Expected a paragraph");
1188        };
1189
1190        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1191            text
1192        } else {
1193            panic!("Expected a text");
1194        };
1195
1196        assert_eq!(
1197            paragraph.highlights,
1198            vec![
1199                (
1200                    5..7,
1201                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1202                        weight: FontWeight::BOLD,
1203                        ..Default::default()
1204                    }),
1205                ),
1206                (
1207                    7..20,
1208                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1209                        weight: FontWeight::BOLD,
1210                        strikethrough: true,
1211                        ..Default::default()
1212                    }),
1213                ),
1214                (
1215                    20..22,
1216                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1217                        weight: FontWeight::BOLD,
1218                        ..Default::default()
1219                    }),
1220                ),
1221            ]
1222        );
1223    }
1224
1225    #[gpui::test]
1226    async fn test_text_with_inline_html() {
1227        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1228
1229        assert_eq!(
1230            parsed.children,
1231            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1232        );
1233    }
1234
1235    #[gpui::test]
1236    async fn test_raw_links_detection() {
1237        let parsed = parse("Checkout this https://zed.dev link").await;
1238
1239        assert_eq!(
1240            parsed.children,
1241            vec![p("Checkout this https://zed.dev link", 0..34)]
1242        );
1243    }
1244
1245    #[gpui::test]
1246    async fn test_empty_image() {
1247        let parsed = parse("![]()").await;
1248
1249        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1250            text
1251        } else {
1252            panic!("Expected a paragraph");
1253        };
1254        assert_eq!(paragraph.len(), 0);
1255    }
1256
1257    #[gpui::test]
1258    async fn test_image_links_detection() {
1259        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
1260
1261        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1262            text
1263        } else {
1264            panic!("Expected a paragraph");
1265        };
1266        assert_eq!(
1267            paragraph[0],
1268            MarkdownParagraphChunk::Image(Image {
1269                source_range: 0..111,
1270                link: Link::Web {
1271                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1272                },
1273                alt_text: Some("test".into()),
1274                height: None,
1275                width: None,
1276            },)
1277        );
1278    }
1279
1280    #[gpui::test]
1281    async fn test_image_without_alt_text() {
1282        let parsed = parse("![](http://example.com/foo.png)").await;
1283
1284        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1285            text
1286        } else {
1287            panic!("Expected a paragraph");
1288        };
1289        assert_eq!(
1290            paragraph[0],
1291            MarkdownParagraphChunk::Image(Image {
1292                source_range: 0..31,
1293                link: Link::Web {
1294                    url: "http://example.com/foo.png".to_string(),
1295                },
1296                alt_text: None,
1297                height: None,
1298                width: None,
1299            },)
1300        );
1301    }
1302
1303    #[gpui::test]
1304    async fn test_image_with_alt_text_containing_formatting() {
1305        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
1306
1307        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1308            panic!("Expected a paragraph");
1309        };
1310        assert_eq!(
1311            chunks,
1312            &[MarkdownParagraphChunk::Image(Image {
1313                source_range: 0..44,
1314                link: Link::Web {
1315                    url: "http://example.com/foo.png".to_string(),
1316                },
1317                alt_text: Some("foo bar baz".into()),
1318                height: None,
1319                width: None,
1320            }),],
1321        );
1322    }
1323
1324    #[gpui::test]
1325    async fn test_images_with_text_in_between() {
1326        let parsed = parse(
1327            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
1328        )
1329        .await;
1330
1331        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1332            text
1333        } else {
1334            panic!("Expected a paragraph");
1335        };
1336        assert_eq!(
1337            chunks,
1338            &vec![
1339                MarkdownParagraphChunk::Image(Image {
1340                    source_range: 0..81,
1341                    link: Link::Web {
1342                        url: "http://example.com/foo.png".to_string(),
1343                    },
1344                    alt_text: Some("foo".into()),
1345                    height: None,
1346                    width: None,
1347                }),
1348                MarkdownParagraphChunk::Text(ParsedMarkdownText {
1349                    source_range: 0..81,
1350                    contents: " Lorem Ipsum ".to_string(),
1351                    highlights: Vec::new(),
1352                    region_ranges: Vec::new(),
1353                    regions: Vec::new(),
1354                }),
1355                MarkdownParagraphChunk::Image(Image {
1356                    source_range: 0..81,
1357                    link: Link::Web {
1358                        url: "http://example.com/bar.png".to_string(),
1359                    },
1360                    alt_text: Some("bar".into()),
1361                    height: None,
1362                    width: None,
1363                })
1364            ]
1365        );
1366    }
1367
1368    #[test]
1369    fn test_parse_length() {
1370        // Test percentage values
1371        assert_eq!(
1372            MarkdownParser::parse_length("50%"),
1373            Some(DefiniteLength::Fraction(0.5))
1374        );
1375        assert_eq!(
1376            MarkdownParser::parse_length("100%"),
1377            Some(DefiniteLength::Fraction(1.0))
1378        );
1379        assert_eq!(
1380            MarkdownParser::parse_length("25%"),
1381            Some(DefiniteLength::Fraction(0.25))
1382        );
1383        assert_eq!(
1384            MarkdownParser::parse_length("0%"),
1385            Some(DefiniteLength::Fraction(0.0))
1386        );
1387
1388        // Test pixel values
1389        assert_eq!(
1390            MarkdownParser::parse_length("100px"),
1391            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1392        );
1393        assert_eq!(
1394            MarkdownParser::parse_length("50px"),
1395            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1396        );
1397        assert_eq!(
1398            MarkdownParser::parse_length("0px"),
1399            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1400        );
1401
1402        // Test values without units (should be treated as pixels)
1403        assert_eq!(
1404            MarkdownParser::parse_length("100"),
1405            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1406        );
1407        assert_eq!(
1408            MarkdownParser::parse_length("42"),
1409            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1410        );
1411
1412        // Test invalid values
1413        assert_eq!(MarkdownParser::parse_length("invalid"), None);
1414        assert_eq!(MarkdownParser::parse_length("px"), None);
1415        assert_eq!(MarkdownParser::parse_length("%"), None);
1416        assert_eq!(MarkdownParser::parse_length(""), None);
1417        assert_eq!(MarkdownParser::parse_length("abc%"), None);
1418        assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1419
1420        // Test decimal values
1421        assert_eq!(
1422            MarkdownParser::parse_length("50.5%"),
1423            Some(DefiniteLength::Fraction(0.505))
1424        );
1425        assert_eq!(
1426            MarkdownParser::parse_length("100.25px"),
1427            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1428        );
1429        assert_eq!(
1430            MarkdownParser::parse_length("42.0"),
1431            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1432        );
1433    }
1434
1435    #[gpui::test]
1436    async fn test_html_block_quote() {
1437        let parsed = parse(
1438            "<blockquote>
1439              <p>some description</p>
1440            </blockquote>",
1441        )
1442        .await;
1443
1444        assert_eq!(
1445            ParsedMarkdown {
1446                children: vec![block_quote(
1447                    vec![ParsedMarkdownElement::Paragraph(text(
1448                        "some description",
1449                        0..76
1450                    ))],
1451                    0..76,
1452                )]
1453            },
1454            parsed
1455        );
1456    }
1457
1458    #[gpui::test]
1459    async fn test_html_nested_block_quote() {
1460        let parsed = parse(
1461            "<blockquote>
1462              <p>some description</p>
1463              <blockquote>
1464                <p>second description</p>
1465              </blockquote>
1466            </blockquote>",
1467        )
1468        .await;
1469
1470        assert_eq!(
1471            ParsedMarkdown {
1472                children: vec![block_quote(
1473                    vec![
1474                        ParsedMarkdownElement::Paragraph(text("some description", 0..173)),
1475                        block_quote(
1476                            vec![ParsedMarkdownElement::Paragraph(text(
1477                                "second description",
1478                                0..173
1479                            ))],
1480                            0..173,
1481                        )
1482                    ],
1483                    0..173,
1484                )]
1485            },
1486            parsed
1487        );
1488    }
1489
1490    #[gpui::test]
1491    async fn test_html_table() {
1492        let parsed = parse(
1493            "<table>
1494          <thead>
1495            <tr>
1496              <th>Id</th>
1497              <th>Name</th>
1498            </tr>
1499          </thead>
1500          <tbody>
1501            <tr>
1502              <td>1</td>
1503              <td>Chris</td>
1504            </tr>
1505            <tr>
1506              <td>2</td>
1507              <td>Dennis</td>
1508            </tr>
1509          </tbody>
1510        </table>",
1511        )
1512        .await;
1513
1514        assert_eq!(
1515            ParsedMarkdown {
1516                children: vec![ParsedMarkdownElement::Table(table(
1517                    0..366,
1518                    row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1519                    vec![
1520                        row(vec![text("1", 0..366), text("Chris", 0..366)]),
1521                        row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1522                    ],
1523                ))],
1524            },
1525            parsed
1526        );
1527    }
1528
1529    #[gpui::test]
1530    async fn test_html_table_without_headings() {
1531        let parsed = parse(
1532            "<table>
1533          <tbody>
1534            <tr>
1535              <td>1</td>
1536              <td>Chris</td>
1537            </tr>
1538            <tr>
1539              <td>2</td>
1540              <td>Dennis</td>
1541            </tr>
1542          </tbody>
1543        </table>",
1544        )
1545        .await;
1546
1547        assert_eq!(
1548            ParsedMarkdown {
1549                children: vec![ParsedMarkdownElement::Table(table(
1550                    0..240,
1551                    row(vec![]),
1552                    vec![
1553                        row(vec![text("1", 0..240), text("Chris", 0..240)]),
1554                        row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1555                    ],
1556                ))],
1557            },
1558            parsed
1559        );
1560    }
1561
1562    #[gpui::test]
1563    async fn test_html_table_without_body() {
1564        let parsed = parse(
1565            "<table>
1566          <thead>
1567            <tr>
1568              <th>Id</th>
1569              <th>Name</th>
1570            </tr>
1571          </thead>
1572        </table>",
1573        )
1574        .await;
1575
1576        assert_eq!(
1577            ParsedMarkdown {
1578                children: vec![ParsedMarkdownElement::Table(table(
1579                    0..150,
1580                    row(vec![text("Id", 0..150), text("Name", 0..150)]),
1581                    vec![],
1582                ))],
1583            },
1584            parsed
1585        );
1586    }
1587
1588    #[gpui::test]
1589    async fn test_html_heading_tags() {
1590        let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1591
1592        assert_eq!(
1593            ParsedMarkdown {
1594                children: vec![
1595                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1596                        level: HeadingLevel::H1,
1597                        source_range: 0..96,
1598                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1599                            source_range: 0..96,
1600                            contents: "Heading".into(),
1601                            highlights: Vec::default(),
1602                            region_ranges: Vec::default(),
1603                            regions: Vec::default()
1604                        })],
1605                    }),
1606                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1607                        level: HeadingLevel::H2,
1608                        source_range: 0..96,
1609                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1610                            source_range: 0..96,
1611                            contents: "Heading".into(),
1612                            highlights: Vec::default(),
1613                            region_ranges: Vec::default(),
1614                            regions: Vec::default()
1615                        })],
1616                    }),
1617                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1618                        level: HeadingLevel::H3,
1619                        source_range: 0..96,
1620                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1621                            source_range: 0..96,
1622                            contents: "Heading".into(),
1623                            highlights: Vec::default(),
1624                            region_ranges: Vec::default(),
1625                            regions: Vec::default()
1626                        })],
1627                    }),
1628                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1629                        level: HeadingLevel::H4,
1630                        source_range: 0..96,
1631                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1632                            source_range: 0..96,
1633                            contents: "Heading".into(),
1634                            highlights: Vec::default(),
1635                            region_ranges: Vec::default(),
1636                            regions: Vec::default()
1637                        })],
1638                    }),
1639                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1640                        level: HeadingLevel::H5,
1641                        source_range: 0..96,
1642                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1643                            source_range: 0..96,
1644                            contents: "Heading".into(),
1645                            highlights: Vec::default(),
1646                            region_ranges: Vec::default(),
1647                            regions: Vec::default()
1648                        })],
1649                    }),
1650                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1651                        level: HeadingLevel::H6,
1652                        source_range: 0..96,
1653                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1654                            source_range: 0..96,
1655                            contents: "Heading".into(),
1656                            highlights: Vec::default(),
1657                            region_ranges: Vec::default(),
1658                            regions: Vec::default()
1659                        })],
1660                    }),
1661                ],
1662            },
1663            parsed
1664        );
1665    }
1666
1667    #[gpui::test]
1668    async fn test_html_image_tag() {
1669        let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1670
1671        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1672            panic!("Expected a image element");
1673        };
1674        assert_eq!(
1675            image.clone(),
1676            Image {
1677                source_range: 0..40,
1678                link: Link::Web {
1679                    url: "http://example.com/foo.png".to_string(),
1680                },
1681                alt_text: None,
1682                height: None,
1683                width: None,
1684            },
1685        );
1686    }
1687
1688    #[gpui::test]
1689    async fn test_html_image_tag_with_alt_text() {
1690        let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1691
1692        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1693            panic!("Expected a image element");
1694        };
1695        assert_eq!(
1696            image.clone(),
1697            Image {
1698                source_range: 0..50,
1699                link: Link::Web {
1700                    url: "http://example.com/foo.png".to_string(),
1701                },
1702                alt_text: Some("Foo".into()),
1703                height: None,
1704                width: None,
1705            },
1706        );
1707    }
1708
1709    #[gpui::test]
1710    async fn test_html_image_tag_with_height_and_width() {
1711        let parsed =
1712            parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1713
1714        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1715            panic!("Expected a image element");
1716        };
1717        assert_eq!(
1718            image.clone(),
1719            Image {
1720                source_range: 0..65,
1721                link: Link::Web {
1722                    url: "http://example.com/foo.png".to_string(),
1723                },
1724                alt_text: None,
1725                height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1726                width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1727            },
1728        );
1729    }
1730
1731    #[gpui::test]
1732    async fn test_html_image_style_tag_with_height_and_width() {
1733        let parsed = parse(
1734            "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1735        )
1736        .await;
1737
1738        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1739            panic!("Expected a image element");
1740        };
1741        assert_eq!(
1742            image.clone(),
1743            Image {
1744                source_range: 0..75,
1745                link: Link::Web {
1746                    url: "http://example.com/foo.png".to_string(),
1747                },
1748                alt_text: None,
1749                height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1750                width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1751            },
1752        );
1753    }
1754
1755    #[gpui::test]
1756    async fn test_header_only_table() {
1757        let markdown = "\
1758| Header 1 | Header 2 |
1759|----------|----------|
1760
1761Some other content
1762";
1763
1764        let expected_table = table(
1765            0..48,
1766            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1767            vec![],
1768        );
1769
1770        assert_eq!(
1771            parse(markdown).await.children[0],
1772            ParsedMarkdownElement::Table(expected_table)
1773        );
1774    }
1775
1776    #[gpui::test]
1777    async fn test_basic_table() {
1778        let markdown = "\
1779| Header 1 | Header 2 |
1780|----------|----------|
1781| Cell 1   | Cell 2   |
1782| Cell 3   | Cell 4   |";
1783
1784        let expected_table = table(
1785            0..95,
1786            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1787            vec![
1788                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1789                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1790            ],
1791        );
1792
1793        assert_eq!(
1794            parse(markdown).await.children[0],
1795            ParsedMarkdownElement::Table(expected_table)
1796        );
1797    }
1798
1799    #[gpui::test]
1800    async fn test_list_basic() {
1801        let parsed = parse(
1802            "\
1803* Item 1
1804* Item 2
1805* Item 3
1806",
1807        )
1808        .await;
1809
1810        assert_eq!(
1811            parsed.children,
1812            vec![
1813                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1814                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1815                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1816            ],
1817        );
1818    }
1819
1820    #[gpui::test]
1821    async fn test_list_with_tasks() {
1822        let parsed = parse(
1823            "\
1824- [ ] TODO
1825- [x] Checked
1826",
1827        )
1828        .await;
1829
1830        assert_eq!(
1831            parsed.children,
1832            vec![
1833                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1834                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1835            ],
1836        );
1837    }
1838
1839    #[gpui::test]
1840    async fn test_list_with_indented_task() {
1841        let parsed = parse(
1842            "\
1843- [ ] TODO
1844  - [x] Checked
1845  - Unordered
1846  1. Number 1
1847  1. Number 2
18481. Number A
1849",
1850        )
1851        .await;
1852
1853        assert_eq!(
1854            parsed.children,
1855            vec![
1856                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1857                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1858                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1859                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1860                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1861                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1862            ],
1863        );
1864    }
1865
1866    #[gpui::test]
1867    async fn test_list_with_linebreak_is_handled_correctly() {
1868        let parsed = parse(
1869            "\
1870- [ ] Task 1
1871
1872- [x] Task 2
1873",
1874        )
1875        .await;
1876
1877        assert_eq!(
1878            parsed.children,
1879            vec![
1880                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1881                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1882            ],
1883        );
1884    }
1885
1886    #[gpui::test]
1887    async fn test_list_nested() {
1888        let parsed = parse(
1889            "\
1890* Item 1
1891* Item 2
1892* Item 3
1893
18941. Hello
18951. Two
1896   1. Three
18972. Four
18983. Five
1899
1900* First
1901  1. Hello
1902     1. Goodbyte
1903        - Inner
1904        - Inner
1905  2. Goodbyte
1906        - Next item empty
1907        -
1908* Last
1909",
1910        )
1911        .await;
1912
1913        assert_eq!(
1914            parsed.children,
1915            vec![
1916                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1917                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1918                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1919                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1920                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1921                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1922                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1923                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1924                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1925                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1926                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1927                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1928                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1929                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1930                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1931                list_item(186..190, 3, Unordered, vec![]),
1932                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1933            ]
1934        );
1935    }
1936
1937    #[gpui::test]
1938    async fn test_list_with_nested_content() {
1939        let parsed = parse(
1940            "\
1941*   This is a list item with two paragraphs.
1942
1943    This is the second paragraph in the list item.
1944",
1945        )
1946        .await;
1947
1948        assert_eq!(
1949            parsed.children,
1950            vec![list_item(
1951                0..96,
1952                1,
1953                Unordered,
1954                vec![
1955                    p("This is a list item with two paragraphs.", 4..44),
1956                    p("This is the second paragraph in the list item.", 50..97)
1957                ],
1958            ),],
1959        );
1960    }
1961
1962    #[gpui::test]
1963    async fn test_list_item_with_inline_html() {
1964        let parsed = parse(
1965            "\
1966*   This is a list item with an inline HTML <sometag>tag</sometag>.
1967",
1968        )
1969        .await;
1970
1971        assert_eq!(
1972            parsed.children,
1973            vec![list_item(
1974                0..67,
1975                1,
1976                Unordered,
1977                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1978            ),],
1979        );
1980    }
1981
1982    #[gpui::test]
1983    async fn test_nested_list_with_paragraph_inside() {
1984        let parsed = parse(
1985            "\
19861. a
1987    1. b
1988        1. c
1989
1990    text
1991
1992    1. d
1993",
1994        )
1995        .await;
1996
1997        assert_eq!(
1998            parsed.children,
1999            vec![
2000                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2001                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2002                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2003                p("text", 32..37),
2004                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2005            ],
2006        );
2007    }
2008
2009    #[gpui::test]
2010    async fn test_list_with_leading_text() {
2011        let parsed = parse(
2012            "\
2013* `code`
2014* **bold**
2015* [link](https://example.com)
2016",
2017        )
2018        .await;
2019
2020        assert_eq!(
2021            parsed.children,
2022            vec![
2023                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2024                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2025                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2026            ],
2027        );
2028    }
2029
2030    #[gpui::test]
2031    async fn test_simple_block_quote() {
2032        let parsed = parse("> Simple block quote with **styled text**").await;
2033
2034        assert_eq!(
2035            parsed.children,
2036            vec![block_quote(
2037                vec![p("Simple block quote with styled text", 2..41)],
2038                0..41
2039            )]
2040        );
2041    }
2042
2043    #[gpui::test]
2044    async fn test_simple_block_quote_with_multiple_lines() {
2045        let parsed = parse(
2046            "\
2047> # Heading
2048> More
2049> text
2050>
2051> More text
2052",
2053        )
2054        .await;
2055
2056        assert_eq!(
2057            parsed.children,
2058            vec![block_quote(
2059                vec![
2060                    h1(text("Heading", 4..11), 2..12),
2061                    p("More text", 14..26),
2062                    p("More text", 30..40)
2063                ],
2064                0..40
2065            )]
2066        );
2067    }
2068
2069    #[gpui::test]
2070    async fn test_nested_block_quote() {
2071        let parsed = parse(
2072            "\
2073> A
2074>
2075> > # B
2076>
2077> C
2078
2079More text
2080",
2081        )
2082        .await;
2083
2084        assert_eq!(
2085            parsed.children,
2086            vec![
2087                block_quote(
2088                    vec![
2089                        p("A", 2..4),
2090                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2091                        p("C", 18..20)
2092                    ],
2093                    0..20
2094                ),
2095                p("More text", 21..31)
2096            ]
2097        );
2098    }
2099
2100    #[gpui::test]
2101    async fn test_code_block() {
2102        let parsed = parse(
2103            "\
2104```
2105fn main() {
2106    return 0;
2107}
2108```
2109",
2110        )
2111        .await;
2112
2113        assert_eq!(
2114            parsed.children,
2115            vec![code_block(
2116                None,
2117                "fn main() {\n    return 0;\n}",
2118                0..35,
2119                None
2120            )]
2121        );
2122    }
2123
2124    #[gpui::test]
2125    async fn test_code_block_with_language(executor: BackgroundExecutor) {
2126        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2127        language_registry.add(rust_lang());
2128
2129        let parsed = parse_markdown(
2130            "\
2131```rust
2132fn main() {
2133    return 0;
2134}
2135```
2136",
2137            None,
2138            Some(language_registry),
2139        )
2140        .await;
2141
2142        assert_eq!(
2143            parsed.children,
2144            vec![code_block(
2145                Some("rust".to_string()),
2146                "fn main() {\n    return 0;\n}",
2147                0..39,
2148                Some(vec![])
2149            )]
2150        );
2151    }
2152
2153    fn rust_lang() -> Arc<Language> {
2154        Arc::new(Language::new(
2155            LanguageConfig {
2156                name: "Rust".into(),
2157                matcher: LanguageMatcher {
2158                    path_suffixes: vec!["rs".into()],
2159                    ..Default::default()
2160                },
2161                collapsed_placeholder: " /* ... */ ".to_string(),
2162                ..Default::default()
2163            },
2164            Some(tree_sitter_rust::LANGUAGE.into()),
2165        ))
2166    }
2167
2168    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2169        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2170            source_range,
2171            level: HeadingLevel::H1,
2172            contents,
2173        })
2174    }
2175
2176    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2177        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2178            source_range,
2179            level: HeadingLevel::H2,
2180            contents,
2181        })
2182    }
2183
2184    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2185        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2186            source_range,
2187            level: HeadingLevel::H3,
2188            contents,
2189        })
2190    }
2191
2192    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2193        ParsedMarkdownElement::Paragraph(text(contents, source_range))
2194    }
2195
2196    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2197        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2198            highlights: Vec::new(),
2199            region_ranges: Vec::new(),
2200            regions: Vec::new(),
2201            source_range,
2202            contents: contents.to_string(),
2203        })]
2204    }
2205
2206    fn block_quote(
2207        children: Vec<ParsedMarkdownElement>,
2208        source_range: Range<usize>,
2209    ) -> ParsedMarkdownElement {
2210        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2211            source_range,
2212            children,
2213        })
2214    }
2215
2216    fn code_block(
2217        language: Option<String>,
2218        code: &str,
2219        source_range: Range<usize>,
2220        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2221    ) -> ParsedMarkdownElement {
2222        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2223            source_range,
2224            language,
2225            contents: code.to_string().into(),
2226            highlights,
2227        })
2228    }
2229
2230    fn list_item(
2231        source_range: Range<usize>,
2232        depth: u16,
2233        item_type: ParsedMarkdownListItemType,
2234        content: Vec<ParsedMarkdownElement>,
2235    ) -> ParsedMarkdownElement {
2236        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2237            source_range,
2238            item_type,
2239            depth,
2240            content,
2241        })
2242    }
2243
2244    fn table(
2245        source_range: Range<usize>,
2246        header: ParsedMarkdownTableRow,
2247        body: Vec<ParsedMarkdownTableRow>,
2248    ) -> ParsedMarkdownTable {
2249        ParsedMarkdownTable {
2250            column_alignments: Vec::new(),
2251            source_range,
2252            header,
2253            body,
2254        }
2255    }
2256
2257    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2258        ParsedMarkdownTableRow { children }
2259    }
2260
2261    impl PartialEq for ParsedMarkdownTable {
2262        fn eq(&self, other: &Self) -> bool {
2263            self.source_range == other.source_range
2264                && self.header == other.header
2265                && self.body == other.body
2266        }
2267    }
2268
2269    impl PartialEq for ParsedMarkdownText {
2270        fn eq(&self, other: &Self) -> bool {
2271            self.source_range == other.source_range && self.contents == other.contents
2272        }
2273    }
2274}