markdown_parser.rs

   1use crate::{
   2    markdown_elements::*,
   3    markdown_minifier::{Minifier, MinifierOptions},
   4};
   5use async_recursion::async_recursion;
   6use collections::FxHashMap;
   7use gpui::{DefiniteLength, FontWeight, px, relative};
   8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
   9use language::LanguageRegistry;
  10use markup5ever_rcdom::RcDom;
  11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
  12use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
  13
  14pub async fn parse_markdown(
  15    markdown_input: &str,
  16    file_location_directory: Option<PathBuf>,
  17    language_registry: Option<Arc<LanguageRegistry>>,
  18) -> ParsedMarkdown {
  19    let mut options = Options::all();
  20    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
  21
  22    let parser = Parser::new_ext(markdown_input, options);
  23    let parser = MarkdownParser::new(
  24        parser.into_offset_iter().collect(),
  25        file_location_directory,
  26        language_registry,
  27    );
  28    let renderer = parser.parse_document().await;
  29    ParsedMarkdown {
  30        children: renderer.parsed,
  31    }
  32}
  33
  34fn cleanup_html(source: &str) -> Vec<u8> {
  35    let mut writer = std::io::Cursor::new(Vec::new());
  36    let mut reader = std::io::Cursor::new(source);
  37    let mut minify = Minifier::new(
  38        &mut writer,
  39        MinifierOptions {
  40            omit_doctype: true,
  41            collapse_whitespace: true,
  42            ..Default::default()
  43        },
  44    );
  45    if let Ok(()) = minify.minify(&mut reader) {
  46        writer.into_inner()
  47    } else {
  48        source.bytes().collect()
  49    }
  50}
  51
  52struct MarkdownParser<'a> {
  53    tokens: Vec<(Event<'a>, Range<usize>)>,
  54    /// The current index in the tokens array
  55    cursor: usize,
  56    /// The blocks that we have successfully parsed so far
  57    parsed: Vec<ParsedMarkdownElement>,
  58    file_location_directory: Option<PathBuf>,
  59    language_registry: Option<Arc<LanguageRegistry>>,
  60}
  61
  62struct MarkdownListItem {
  63    content: Vec<ParsedMarkdownElement>,
  64    item_type: ParsedMarkdownListItemType,
  65}
  66
  67impl Default for MarkdownListItem {
  68    fn default() -> Self {
  69        Self {
  70            content: Vec::new(),
  71            item_type: ParsedMarkdownListItemType::Unordered,
  72        }
  73    }
  74}
  75
  76impl<'a> MarkdownParser<'a> {
  77    fn new(
  78        tokens: Vec<(Event<'a>, Range<usize>)>,
  79        file_location_directory: Option<PathBuf>,
  80        language_registry: Option<Arc<LanguageRegistry>>,
  81    ) -> Self {
  82        Self {
  83            tokens,
  84            file_location_directory,
  85            language_registry,
  86            cursor: 0,
  87            parsed: vec![],
  88        }
  89    }
  90
  91    fn eof(&self) -> bool {
  92        if self.tokens.is_empty() {
  93            return true;
  94        }
  95        self.cursor >= self.tokens.len() - 1
  96    }
  97
  98    fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
  99        if self.eof() || (steps + self.cursor) >= self.tokens.len() {
 100            return self.tokens.last();
 101        }
 102        self.tokens.get(self.cursor + steps)
 103    }
 104
 105    fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
 106        if self.cursor == 0 || self.cursor > self.tokens.len() {
 107            return None;
 108        }
 109        self.tokens.get(self.cursor - 1)
 110    }
 111
 112    fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
 113        self.peek(0)
 114    }
 115
 116    fn current_event(&self) -> Option<&Event<'_>> {
 117        self.current().map(|(event, _)| event)
 118    }
 119
 120    fn is_text_like(event: &Event) -> bool {
 121        match event {
 122            Event::Text(_)
 123            // Represent an inline code block
 124            | Event::Code(_)
 125            | Event::Html(_)
 126            | Event::InlineHtml(_)
 127            | Event::FootnoteReference(_)
 128            | Event::Start(Tag::Link { .. })
 129            | Event::Start(Tag::Emphasis)
 130            | Event::Start(Tag::Strong)
 131            | Event::Start(Tag::Strikethrough)
 132            | Event::Start(Tag::Image { .. }) => {
 133                true
 134            }
 135            _ => false,
 136        }
 137    }
 138
 139    async fn parse_document(mut self) -> Self {
 140        while !self.eof() {
 141            if let Some(block) = self.parse_block().await {
 142                self.parsed.extend(block);
 143            } else {
 144                self.cursor += 1;
 145            }
 146        }
 147        self
 148    }
 149
 150    #[async_recursion]
 151    async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
 152        let (current, source_range) = self.current().unwrap();
 153        let source_range = source_range.clone();
 154        match current {
 155            Event::Start(tag) => match tag {
 156                Tag::Paragraph => {
 157                    self.cursor += 1;
 158                    let text = self.parse_text(false, Some(source_range));
 159                    Some(vec![ParsedMarkdownElement::Paragraph(text)])
 160                }
 161                Tag::Heading { level, .. } => {
 162                    let level = *level;
 163                    self.cursor += 1;
 164                    let heading = self.parse_heading(level);
 165                    Some(vec![ParsedMarkdownElement::Heading(heading)])
 166                }
 167                Tag::Table(alignment) => {
 168                    let alignment = alignment.clone();
 169                    self.cursor += 1;
 170                    let table = self.parse_table(alignment);
 171                    Some(vec![ParsedMarkdownElement::Table(table)])
 172                }
 173                Tag::List(order) => {
 174                    let order = *order;
 175                    self.cursor += 1;
 176                    let list = self.parse_list(order).await;
 177                    Some(list)
 178                }
 179                Tag::BlockQuote(_kind) => {
 180                    self.cursor += 1;
 181                    let block_quote = self.parse_block_quote().await;
 182                    Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
 183                }
 184                Tag::CodeBlock(kind) => {
 185                    let language = match kind {
 186                        pulldown_cmark::CodeBlockKind::Indented => None,
 187                        pulldown_cmark::CodeBlockKind::Fenced(language) => {
 188                            if language.is_empty() {
 189                                None
 190                            } else {
 191                                Some(language.to_string())
 192                            }
 193                        }
 194                    };
 195
 196                    self.cursor += 1;
 197
 198                    let code_block = self.parse_code_block(language).await?;
 199                    Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
 200                }
 201                Tag::HtmlBlock => {
 202                    self.cursor += 1;
 203
 204                    Some(self.parse_html_block().await)
 205                }
 206                _ => None,
 207            },
 208            Event::Rule => {
 209                self.cursor += 1;
 210                Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
 211            }
 212            _ => None,
 213        }
 214    }
 215
 216    fn parse_text(
 217        &mut self,
 218        should_complete_on_soft_break: bool,
 219        source_range: Option<Range<usize>>,
 220    ) -> MarkdownParagraph {
 221        let source_range = source_range.unwrap_or_else(|| {
 222            self.current()
 223                .map(|(_, range)| range.clone())
 224                .unwrap_or_default()
 225        });
 226
 227        let mut markdown_text_like = Vec::new();
 228        let mut text = String::new();
 229        let mut bold_depth = 0;
 230        let mut italic_depth = 0;
 231        let mut strikethrough_depth = 0;
 232        let mut link: Option<Link> = None;
 233        let mut image: Option<Image> = None;
 234        let mut region_ranges: Vec<Range<usize>> = vec![];
 235        let mut regions: Vec<ParsedRegion> = vec![];
 236        let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
 237        let mut link_urls: Vec<String> = vec![];
 238        let mut link_ranges: Vec<Range<usize>> = vec![];
 239
 240        loop {
 241            if self.eof() {
 242                break;
 243            }
 244
 245            let (current, _) = self.current().unwrap();
 246            let prev_len = text.len();
 247            match current {
 248                Event::SoftBreak => {
 249                    if should_complete_on_soft_break {
 250                        break;
 251                    }
 252                    text.push(' ');
 253                }
 254
 255                Event::HardBreak => {
 256                    text.push('\n');
 257                }
 258
 259                // We want to ignore any inline HTML tags in the text but keep
 260                // the text between them
 261                Event::InlineHtml(_) => {}
 262
 263                Event::Text(t) => {
 264                    text.push_str(t.as_ref());
 265                    let mut style = MarkdownHighlightStyle::default();
 266
 267                    if bold_depth > 0 {
 268                        style.weight = FontWeight::BOLD;
 269                    }
 270
 271                    if italic_depth > 0 {
 272                        style.italic = true;
 273                    }
 274
 275                    if strikethrough_depth > 0 {
 276                        style.strikethrough = true;
 277                    }
 278
 279                    let last_run_len = if let Some(link) = link.clone() {
 280                        region_ranges.push(prev_len..text.len());
 281                        regions.push(ParsedRegion {
 282                            code: false,
 283                            link: Some(link),
 284                        });
 285                        style.link = true;
 286                        prev_len
 287                    } else {
 288                        // Manually scan for links
 289                        let mut finder = linkify::LinkFinder::new();
 290                        finder.kinds(&[linkify::LinkKind::Url]);
 291                        let mut last_link_len = prev_len;
 292                        for link in finder.links(t) {
 293                            let start = link.start();
 294                            let end = link.end();
 295                            let range = (prev_len + start)..(prev_len + end);
 296                            link_ranges.push(range.clone());
 297                            link_urls.push(link.as_str().to_string());
 298
 299                            // If there is a style before we match a link, we have to add this to the highlighted ranges
 300                            if style != MarkdownHighlightStyle::default()
 301                                && last_link_len < link.start()
 302                            {
 303                                highlights.push((
 304                                    last_link_len..link.start(),
 305                                    MarkdownHighlight::Style(style.clone()),
 306                                ));
 307                            }
 308
 309                            highlights.push((
 310                                range.clone(),
 311                                MarkdownHighlight::Style(MarkdownHighlightStyle {
 312                                    underline: true,
 313                                    ..style
 314                                }),
 315                            ));
 316                            region_ranges.push(range.clone());
 317                            regions.push(ParsedRegion {
 318                                code: false,
 319                                link: Some(Link::Web {
 320                                    url: link.as_str().to_string(),
 321                                }),
 322                            });
 323                            last_link_len = end;
 324                        }
 325                        last_link_len
 326                    };
 327
 328                    if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
 329                        let mut new_highlight = true;
 330                        if let Some((last_range, last_style)) = highlights.last_mut()
 331                            && last_range.end == last_run_len
 332                            && last_style == &MarkdownHighlight::Style(style.clone())
 333                        {
 334                            last_range.end = text.len();
 335                            new_highlight = false;
 336                        }
 337                        if new_highlight {
 338                            highlights.push((
 339                                last_run_len..text.len(),
 340                                MarkdownHighlight::Style(style.clone()),
 341                            ));
 342                        }
 343                    }
 344                }
 345                Event::Code(t) => {
 346                    text.push_str(t.as_ref());
 347                    region_ranges.push(prev_len..text.len());
 348
 349                    if link.is_some() {
 350                        highlights.push((
 351                            prev_len..text.len(),
 352                            MarkdownHighlight::Style(MarkdownHighlightStyle {
 353                                link: true,
 354                                ..Default::default()
 355                            }),
 356                        ));
 357                    }
 358                    regions.push(ParsedRegion {
 359                        code: true,
 360                        link: link.clone(),
 361                    });
 362                }
 363                Event::Start(tag) => match tag {
 364                    Tag::Emphasis => italic_depth += 1,
 365                    Tag::Strong => bold_depth += 1,
 366                    Tag::Strikethrough => strikethrough_depth += 1,
 367                    Tag::Link { dest_url, .. } => {
 368                        link = Link::identify(
 369                            self.file_location_directory.clone(),
 370                            dest_url.to_string(),
 371                        );
 372                    }
 373                    Tag::Image { dest_url, .. } => {
 374                        if !text.is_empty() {
 375                            let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
 376                                source_range: source_range.clone(),
 377                                contents: text.clone(),
 378                                highlights: highlights.clone(),
 379                                region_ranges: region_ranges.clone(),
 380                                regions: regions.clone(),
 381                            });
 382                            text = String::new();
 383                            highlights = vec![];
 384                            region_ranges = vec![];
 385                            regions = vec![];
 386                            markdown_text_like.push(parsed_regions);
 387                        }
 388                        image = Image::identify(
 389                            dest_url.to_string(),
 390                            source_range.clone(),
 391                            self.file_location_directory.clone(),
 392                        );
 393                    }
 394                    _ => {
 395                        break;
 396                    }
 397                },
 398
 399                Event::End(tag) => match tag {
 400                    TagEnd::Emphasis => italic_depth -= 1,
 401                    TagEnd::Strong => bold_depth -= 1,
 402                    TagEnd::Strikethrough => strikethrough_depth -= 1,
 403                    TagEnd::Link => {
 404                        link = None;
 405                    }
 406                    TagEnd::Image => {
 407                        if let Some(mut image) = image.take() {
 408                            if !text.is_empty() {
 409                                image.set_alt_text(std::mem::take(&mut text).into());
 410                            }
 411                            markdown_text_like.push(MarkdownParagraphChunk::Image(image));
 412                        }
 413                    }
 414                    TagEnd::Paragraph => {
 415                        self.cursor += 1;
 416                        break;
 417                    }
 418                    _ => {
 419                        break;
 420                    }
 421                },
 422                _ => {
 423                    break;
 424                }
 425            }
 426
 427            self.cursor += 1;
 428        }
 429        if !text.is_empty() {
 430            markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 431                source_range,
 432                contents: text,
 433                highlights,
 434                regions,
 435                region_ranges,
 436            }));
 437        }
 438        markdown_text_like
 439    }
 440
 441    fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
 442        let (_event, source_range) = self.previous().unwrap();
 443        let source_range = source_range.clone();
 444        let text = self.parse_text(true, None);
 445
 446        // Advance past the heading end tag
 447        self.cursor += 1;
 448
 449        ParsedMarkdownHeading {
 450            source_range,
 451            level: match level {
 452                pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
 453                pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
 454                pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
 455                pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
 456                pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
 457                pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
 458            },
 459            contents: text,
 460        }
 461    }
 462
 463    fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
 464        let (_event, source_range) = self.previous().unwrap();
 465        let source_range = source_range.clone();
 466        let mut header = ParsedMarkdownTableRow::new();
 467        let mut body = vec![];
 468        let mut current_row = vec![];
 469        let mut in_header = true;
 470        let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
 471
 472        loop {
 473            if self.eof() {
 474                break;
 475            }
 476
 477            let (current, source_range) = self.current().unwrap();
 478            let source_range = source_range.clone();
 479            match current {
 480                Event::Start(Tag::TableHead)
 481                | Event::Start(Tag::TableRow)
 482                | Event::End(TagEnd::TableCell) => {
 483                    self.cursor += 1;
 484                }
 485                Event::Start(Tag::TableCell) => {
 486                    self.cursor += 1;
 487                    let cell_contents = self.parse_text(false, Some(source_range));
 488                    current_row.push(cell_contents);
 489                }
 490                Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
 491                    self.cursor += 1;
 492                    let new_row = std::mem::take(&mut current_row);
 493                    if in_header {
 494                        header.children = new_row;
 495                        in_header = false;
 496                    } else {
 497                        let row = ParsedMarkdownTableRow::with_children(new_row);
 498                        body.push(row);
 499                    }
 500                }
 501                Event::End(TagEnd::Table) => {
 502                    self.cursor += 1;
 503                    break;
 504                }
 505                _ => {
 506                    break;
 507                }
 508            }
 509        }
 510
 511        ParsedMarkdownTable {
 512            source_range,
 513            header,
 514            body,
 515            column_alignments,
 516        }
 517    }
 518
 519    fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
 520        match alignment {
 521            Alignment::None => ParsedMarkdownTableAlignment::None,
 522            Alignment::Left => ParsedMarkdownTableAlignment::Left,
 523            Alignment::Center => ParsedMarkdownTableAlignment::Center,
 524            Alignment::Right => ParsedMarkdownTableAlignment::Right,
 525        }
 526    }
 527
 528    async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
 529        let (_, list_source_range) = self.previous().unwrap();
 530
 531        let mut items = Vec::new();
 532        let mut items_stack = vec![MarkdownListItem::default()];
 533        let mut depth = 1;
 534        let mut order = order;
 535        let mut order_stack = Vec::new();
 536
 537        let mut insertion_indices = FxHashMap::default();
 538        let mut source_ranges = FxHashMap::default();
 539        let mut start_item_range = list_source_range.clone();
 540
 541        while !self.eof() {
 542            let (current, source_range) = self.current().unwrap();
 543            match current {
 544                Event::Start(Tag::List(new_order)) => {
 545                    if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
 546                        insertion_indices.insert(depth, items.len());
 547                    }
 548
 549                    // We will use the start of the nested list as the end for the current item's range,
 550                    // because we don't care about the hierarchy of list items
 551                    if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
 552                        e.insert(start_item_range.start..source_range.start);
 553                    }
 554
 555                    order_stack.push(order);
 556                    order = *new_order;
 557                    self.cursor += 1;
 558                    depth += 1;
 559                }
 560                Event::End(TagEnd::List(_)) => {
 561                    order = order_stack.pop().flatten();
 562                    self.cursor += 1;
 563                    depth -= 1;
 564
 565                    if depth == 0 {
 566                        break;
 567                    }
 568                }
 569                Event::Start(Tag::Item) => {
 570                    start_item_range = source_range.clone();
 571
 572                    self.cursor += 1;
 573                    items_stack.push(MarkdownListItem::default());
 574
 575                    let mut task_list = None;
 576                    // Check for task list marker (`- [ ]` or `- [x]`)
 577                    if let Some(event) = self.current_event() {
 578                        // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 579                        if event == &Event::Start(Tag::Paragraph) {
 580                            self.cursor += 1;
 581                        }
 582
 583                        if let Some((Event::TaskListMarker(checked), range)) = self.current() {
 584                            task_list = Some((*checked, range.clone()));
 585                            self.cursor += 1;
 586                        }
 587                    }
 588
 589                    if let Some((event, range)) = self.current() {
 590                        // This is a plain list item.
 591                        // For example `- some text` or `1. [Docs](./docs.md)`
 592                        if MarkdownParser::is_text_like(event) {
 593                            let text = self.parse_text(false, Some(range.clone()));
 594                            let block = ParsedMarkdownElement::Paragraph(text);
 595                            if let Some(content) = items_stack.last_mut() {
 596                                let item_type = if let Some((checked, range)) = task_list {
 597                                    ParsedMarkdownListItemType::Task(checked, range)
 598                                } else if let Some(order) = order {
 599                                    ParsedMarkdownListItemType::Ordered(order)
 600                                } else {
 601                                    ParsedMarkdownListItemType::Unordered
 602                                };
 603                                content.item_type = item_type;
 604                                content.content.push(block);
 605                            }
 606                        } else {
 607                            let block = self.parse_block().await;
 608                            if let Some(block) = block
 609                                && let Some(list_item) = items_stack.last_mut()
 610                            {
 611                                list_item.content.extend(block);
 612                            }
 613                        }
 614                    }
 615
 616                    // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
 617                    if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
 618                        self.cursor += 1;
 619                    }
 620                }
 621                Event::End(TagEnd::Item) => {
 622                    self.cursor += 1;
 623
 624                    if let Some(current) = order {
 625                        order = Some(current + 1);
 626                    }
 627
 628                    if let Some(list_item) = items_stack.pop() {
 629                        let source_range = source_ranges
 630                            .remove(&depth)
 631                            .unwrap_or(start_item_range.clone());
 632
 633                        // We need to remove the last character of the source range, because it includes the newline character
 634                        let source_range = source_range.start..source_range.end - 1;
 635                        let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
 636                            source_range,
 637                            content: list_item.content,
 638                            depth,
 639                            item_type: list_item.item_type,
 640                        });
 641
 642                        if let Some(index) = insertion_indices.get(&depth) {
 643                            items.insert(*index, item);
 644                            insertion_indices.remove(&depth);
 645                        } else {
 646                            items.push(item);
 647                        }
 648                    }
 649                }
 650                _ => {
 651                    if depth == 0 {
 652                        break;
 653                    }
 654                    // This can only happen if a list item starts with more then one paragraph,
 655                    // or the list item contains blocks that should be rendered after the nested list items
 656                    let block = self.parse_block().await;
 657                    if let Some(block) = block {
 658                        if let Some(list_item) = items_stack.last_mut() {
 659                            // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
 660                            if !insertion_indices.contains_key(&depth) {
 661                                list_item.content.extend(block);
 662                                continue;
 663                            }
 664                        }
 665
 666                        // Otherwise we need to insert the block after all the nested items
 667                        // that have been parsed so far
 668                        items.extend(block);
 669                    } else {
 670                        self.cursor += 1;
 671                    }
 672                }
 673            }
 674        }
 675
 676        items
 677    }
 678
 679    #[async_recursion]
 680    async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
 681        let (_event, source_range) = self.previous().unwrap();
 682        let source_range = source_range.clone();
 683        let mut nested_depth = 1;
 684
 685        let mut children: Vec<ParsedMarkdownElement> = vec![];
 686
 687        while !self.eof() {
 688            let block = self.parse_block().await;
 689
 690            if let Some(block) = block {
 691                children.extend(block);
 692            } else {
 693                break;
 694            }
 695
 696            if self.eof() {
 697                break;
 698            }
 699
 700            let (current, _source_range) = self.current().unwrap();
 701            match current {
 702                // This is a nested block quote.
 703                // Record that we're in a nested block quote and continue parsing.
 704                // We don't need to advance the cursor since the next
 705                // call to `parse_block` will handle it.
 706                Event::Start(Tag::BlockQuote(_kind)) => {
 707                    nested_depth += 1;
 708                }
 709                Event::End(TagEnd::BlockQuote(_kind)) => {
 710                    nested_depth -= 1;
 711                    if nested_depth == 0 {
 712                        self.cursor += 1;
 713                        break;
 714                    }
 715                }
 716                _ => {}
 717            };
 718        }
 719
 720        ParsedMarkdownBlockQuote {
 721            source_range,
 722            children,
 723        }
 724    }
 725
 726    async fn parse_code_block(
 727        &mut self,
 728        language: Option<String>,
 729    ) -> Option<ParsedMarkdownCodeBlock> {
 730        let Some((_event, source_range)) = self.previous() else {
 731            return None;
 732        };
 733
 734        let source_range = source_range.clone();
 735        let mut code = String::new();
 736
 737        while !self.eof() {
 738            let Some((current, _source_range)) = self.current() else {
 739                break;
 740            };
 741
 742            match current {
 743                Event::Text(text) => {
 744                    code.push_str(text);
 745                    self.cursor += 1;
 746                }
 747                Event::End(TagEnd::CodeBlock) => {
 748                    self.cursor += 1;
 749                    break;
 750                }
 751                _ => {
 752                    break;
 753                }
 754            }
 755        }
 756
 757        code = code.strip_suffix('\n').unwrap_or(&code).to_string();
 758
 759        let highlights = if let Some(language) = &language {
 760            if let Some(registry) = &self.language_registry {
 761                let rope: language::Rope = code.as_str().into();
 762                registry
 763                    .language_for_name_or_extension(language)
 764                    .await
 765                    .map(|l| l.highlight_text(&rope, 0..code.len()))
 766                    .ok()
 767            } else {
 768                None
 769            }
 770        } else {
 771            None
 772        };
 773
 774        Some(ParsedMarkdownCodeBlock {
 775            source_range,
 776            contents: code.into(),
 777            language,
 778            highlights,
 779        })
 780    }
 781
 782    async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
 783        let mut elements = Vec::new();
 784        let Some((_event, _source_range)) = self.previous() else {
 785            return elements;
 786        };
 787
 788        let mut html_source_range_start = None;
 789        let mut html_source_range_end = None;
 790        let mut html_buffer = String::new();
 791
 792        while !self.eof() {
 793            let Some((current, source_range)) = self.current() else {
 794                break;
 795            };
 796            let source_range = source_range.clone();
 797            match current {
 798                Event::Html(html) => {
 799                    html_source_range_start.get_or_insert(source_range.start);
 800                    html_source_range_end = Some(source_range.end);
 801                    html_buffer.push_str(html);
 802                    self.cursor += 1;
 803                }
 804                Event::End(TagEnd::CodeBlock) => {
 805                    self.cursor += 1;
 806                    break;
 807                }
 808                _ => {
 809                    break;
 810                }
 811            }
 812        }
 813
 814        let bytes = cleanup_html(&html_buffer);
 815
 816        let mut cursor = std::io::Cursor::new(bytes);
 817        if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
 818            .from_utf8()
 819            .read_from(&mut cursor)
 820            && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
 821        {
 822            self.parse_html_node(start..end, &dom.document, &mut elements);
 823        }
 824
 825        elements
 826    }
 827
 828    fn parse_html_node(
 829        &self,
 830        source_range: Range<usize>,
 831        node: &Rc<markup5ever_rcdom::Node>,
 832        elements: &mut Vec<ParsedMarkdownElement>,
 833    ) {
 834        match &node.data {
 835            markup5ever_rcdom::NodeData::Document => {
 836                self.consume_children(source_range, node, elements);
 837            }
 838            markup5ever_rcdom::NodeData::Doctype { .. } => {}
 839            markup5ever_rcdom::NodeData::Text { contents } => {
 840                elements.push(ParsedMarkdownElement::Paragraph(vec![
 841                    MarkdownParagraphChunk::Text(ParsedMarkdownText {
 842                        source_range,
 843                        contents: contents.borrow().to_string(),
 844                        highlights: Vec::default(),
 845                        region_ranges: Vec::default(),
 846                        regions: Vec::default(),
 847                    }),
 848                ]));
 849            }
 850            markup5ever_rcdom::NodeData::Comment { .. } => {}
 851            markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
 852                if local_name!("img") == name.local {
 853                    if let Some(image) = self.extract_image(source_range, attrs) {
 854                        elements.push(ParsedMarkdownElement::Image(image));
 855                    }
 856                } else if matches!(
 857                    name.local,
 858                    local_name!("h1")
 859                        | local_name!("h2")
 860                        | local_name!("h3")
 861                        | local_name!("h4")
 862                        | local_name!("h5")
 863                        | local_name!("h6")
 864                ) {
 865                    let mut paragraph = MarkdownParagraph::new();
 866                    self.consume_paragraph(source_range.clone(), node, &mut paragraph);
 867
 868                    if !paragraph.is_empty() {
 869                        elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
 870                            source_range,
 871                            level: match name.local {
 872                                local_name!("h1") => HeadingLevel::H1,
 873                                local_name!("h2") => HeadingLevel::H2,
 874                                local_name!("h3") => HeadingLevel::H3,
 875                                local_name!("h4") => HeadingLevel::H4,
 876                                local_name!("h5") => HeadingLevel::H5,
 877                                local_name!("h6") => HeadingLevel::H6,
 878                                _ => unreachable!(),
 879                            },
 880                            contents: paragraph,
 881                        }));
 882                    }
 883                } else if local_name!("table") == name.local {
 884                    if let Some(table) = self.extract_html_table(node, source_range) {
 885                        elements.push(ParsedMarkdownElement::Table(table));
 886                    }
 887                } else {
 888                    self.consume_children(source_range, node, elements);
 889                }
 890            }
 891            markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
 892        }
 893    }
 894
 895    fn parse_paragraph(
 896        &self,
 897        source_range: Range<usize>,
 898        node: &Rc<markup5ever_rcdom::Node>,
 899        paragraph: &mut MarkdownParagraph,
 900    ) {
 901        match &node.data {
 902            markup5ever_rcdom::NodeData::Text { contents } => {
 903                paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
 904                    source_range,
 905                    regions: Vec::default(),
 906                    contents: contents.borrow().to_string(),
 907                    region_ranges: Vec::default(),
 908                    highlights: Vec::default(),
 909                }));
 910            }
 911            markup5ever_rcdom::NodeData::Element { .. } => {
 912                self.consume_paragraph(source_range, node, paragraph);
 913            }
 914            _ => {}
 915        }
 916    }
 917
 918    fn consume_paragraph(
 919        &self,
 920        source_range: Range<usize>,
 921        node: &Rc<markup5ever_rcdom::Node>,
 922        paragraph: &mut MarkdownParagraph,
 923    ) {
 924        for node in node.children.borrow().iter() {
 925            self.parse_paragraph(source_range.clone(), node, paragraph);
 926        }
 927    }
 928
 929    fn consume_children(
 930        &self,
 931        source_range: Range<usize>,
 932        node: &Rc<markup5ever_rcdom::Node>,
 933        elements: &mut Vec<ParsedMarkdownElement>,
 934    ) {
 935        for node in node.children.borrow().iter() {
 936            self.parse_html_node(source_range.clone(), node, elements);
 937        }
 938    }
 939
 940    fn attr_value(
 941        attrs: &RefCell<Vec<html5ever::Attribute>>,
 942        name: html5ever::LocalName,
 943    ) -> Option<String> {
 944        attrs.borrow().iter().find_map(|attr| {
 945            if attr.name.local == name {
 946                Some(attr.value.to_string())
 947            } else {
 948                None
 949            }
 950        })
 951    }
 952
 953    fn extract_styles_from_attributes(
 954        attrs: &RefCell<Vec<html5ever::Attribute>>,
 955    ) -> HashMap<String, String> {
 956        let mut styles = HashMap::new();
 957
 958        if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
 959            for decl in style.split(';') {
 960                let mut parts = decl.splitn(2, ':');
 961                if let Some((key, value)) = parts.next().zip(parts.next()) {
 962                    styles.insert(
 963                        key.trim().to_lowercase().to_string(),
 964                        value.trim().to_string(),
 965                    );
 966                }
 967            }
 968        }
 969
 970        styles
 971    }
 972
 973    fn extract_image(
 974        &self,
 975        source_range: Range<usize>,
 976        attrs: &RefCell<Vec<html5ever::Attribute>>,
 977    ) -> Option<Image> {
 978        let src = Self::attr_value(attrs, local_name!("src"))?;
 979
 980        let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
 981
 982        if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
 983            image.set_alt_text(alt.into());
 984        }
 985
 986        let styles = Self::extract_styles_from_attributes(attrs);
 987
 988        if let Some(width) = Self::attr_value(attrs, local_name!("width"))
 989            .or_else(|| styles.get("width").cloned())
 990            .and_then(|width| Self::parse_length(&width))
 991        {
 992            image.set_width(width);
 993        }
 994
 995        if let Some(height) = Self::attr_value(attrs, local_name!("height"))
 996            .or_else(|| styles.get("height").cloned())
 997            .and_then(|height| Self::parse_length(&height))
 998        {
 999            image.set_height(height);
1000        }
1001
1002        Some(image)
1003    }
1004
1005    fn extract_html_table(
1006        &self,
1007        node: &Rc<markup5ever_rcdom::Node>,
1008        source_range: Range<usize>,
1009    ) -> Option<ParsedMarkdownTable> {
1010        let mut header_columns = Vec::new();
1011        let mut body_rows = Vec::new();
1012
1013        // node should be a thead or tbody element
1014        for node in node.children.borrow().iter() {
1015            match &node.data {
1016                markup5ever_rcdom::NodeData::Element { name, .. } => {
1017                    if local_name!("thead") == name.local {
1018                        // node should be a tr element
1019                        for node in node.children.borrow().iter() {
1020                            let mut paragraph = MarkdownParagraph::new();
1021                            self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1022
1023                            for paragraph in paragraph.into_iter() {
1024                                header_columns.push(vec![paragraph]);
1025                            }
1026                        }
1027                    } else if local_name!("tbody") == name.local {
1028                        // node should be a tr element
1029                        for node in node.children.borrow().iter() {
1030                            let mut row = MarkdownParagraph::new();
1031                            self.consume_paragraph(source_range.clone(), node, &mut row);
1032                            body_rows.push(ParsedMarkdownTableRow::with_children(
1033                                row.into_iter().map(|column| vec![column]).collect(),
1034                            ));
1035                        }
1036                    }
1037                }
1038                _ => {}
1039            }
1040        }
1041
1042        if !header_columns.is_empty() || !body_rows.is_empty() {
1043            Some(ParsedMarkdownTable {
1044                source_range,
1045                body: body_rows,
1046                column_alignments: Vec::default(),
1047                header: ParsedMarkdownTableRow::with_children(header_columns),
1048            })
1049        } else {
1050            None
1051        }
1052    }
1053
1054    /// Parses the width/height attribute value of an html element (e.g. img element)
1055    fn parse_length(value: &str) -> Option<DefiniteLength> {
1056        if value.ends_with("%") {
1057            value
1058                .trim_end_matches("%")
1059                .parse::<f32>()
1060                .ok()
1061                .map(|value| relative(value / 100.))
1062        } else {
1063            value
1064                .trim_end_matches("px")
1065                .parse()
1066                .ok()
1067                .map(|value| px(value).into())
1068        }
1069    }
1070}
1071
1072#[cfg(test)]
1073mod tests {
1074    use super::*;
1075    use ParsedMarkdownListItemType::*;
1076    use core::panic;
1077    use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1078    use language::{
1079        HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1080    };
1081    use pretty_assertions::assert_eq;
1082
1083    async fn parse(input: &str) -> ParsedMarkdown {
1084        parse_markdown(input, None, None).await
1085    }
1086
1087    #[gpui::test]
1088    async fn test_headings() {
1089        let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1090
1091        assert_eq!(
1092            parsed.children,
1093            vec![
1094                h1(text("Heading one", 2..13), 0..14),
1095                h2(text("Heading two", 17..28), 14..29),
1096                h3(text("Heading three", 33..46), 29..46),
1097            ]
1098        );
1099    }
1100
1101    #[gpui::test]
1102    async fn test_newlines_dont_new_paragraphs() {
1103        let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1104
1105        assert_eq!(
1106            parsed.children,
1107            vec![p("Some text that is bolded and italicized", 0..46)]
1108        );
1109    }
1110
1111    #[gpui::test]
1112    async fn test_heading_with_paragraph() {
1113        let parsed = parse("# Zed\nThe editor").await;
1114
1115        assert_eq!(
1116            parsed.children,
1117            vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1118        );
1119    }
1120
1121    #[gpui::test]
1122    async fn test_double_newlines_do_new_paragraphs() {
1123        let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1124
1125        assert_eq!(
1126            parsed.children,
1127            vec![
1128                p("Some text that is bolded", 0..29),
1129                p("and italicized", 31..47),
1130            ]
1131        );
1132    }
1133
1134    #[gpui::test]
1135    async fn test_bold_italic_text() {
1136        let parsed = parse("Some text **that is bolded** and *italicized*").await;
1137
1138        assert_eq!(
1139            parsed.children,
1140            vec![p("Some text that is bolded and italicized", 0..45)]
1141        );
1142    }
1143
1144    #[gpui::test]
1145    async fn test_nested_bold_strikethrough_text() {
1146        let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1147
1148        assert_eq!(parsed.children.len(), 1);
1149        assert_eq!(
1150            parsed.children[0],
1151            ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1152                ParsedMarkdownText {
1153                    source_range: 0..35,
1154                    contents: "Some bostrikethroughld text".to_string(),
1155                    highlights: Vec::new(),
1156                    region_ranges: Vec::new(),
1157                    regions: Vec::new(),
1158                }
1159            )])
1160        );
1161
1162        let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1163            text
1164        } else {
1165            panic!("Expected a paragraph");
1166        };
1167
1168        let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1169            text
1170        } else {
1171            panic!("Expected a text");
1172        };
1173
1174        assert_eq!(
1175            paragraph.highlights,
1176            vec![
1177                (
1178                    5..7,
1179                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1180                        weight: FontWeight::BOLD,
1181                        ..Default::default()
1182                    }),
1183                ),
1184                (
1185                    7..20,
1186                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1187                        weight: FontWeight::BOLD,
1188                        strikethrough: true,
1189                        ..Default::default()
1190                    }),
1191                ),
1192                (
1193                    20..22,
1194                    MarkdownHighlight::Style(MarkdownHighlightStyle {
1195                        weight: FontWeight::BOLD,
1196                        ..Default::default()
1197                    }),
1198                ),
1199            ]
1200        );
1201    }
1202
1203    #[gpui::test]
1204    async fn test_text_with_inline_html() {
1205        let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1206
1207        assert_eq!(
1208            parsed.children,
1209            vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1210        );
1211    }
1212
1213    #[gpui::test]
1214    async fn test_raw_links_detection() {
1215        let parsed = parse("Checkout this https://zed.dev link").await;
1216
1217        assert_eq!(
1218            parsed.children,
1219            vec![p("Checkout this https://zed.dev link", 0..34)]
1220        );
1221    }
1222
1223    #[gpui::test]
1224    async fn test_empty_image() {
1225        let parsed = parse("![]()").await;
1226
1227        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1228            text
1229        } else {
1230            panic!("Expected a paragraph");
1231        };
1232        assert_eq!(paragraph.len(), 0);
1233    }
1234
1235    #[gpui::test]
1236    async fn test_image_links_detection() {
1237        let parsed = parse("![test](https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png)").await;
1238
1239        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1240            text
1241        } else {
1242            panic!("Expected a paragraph");
1243        };
1244        assert_eq!(
1245            paragraph[0],
1246            MarkdownParagraphChunk::Image(Image {
1247                source_range: 0..111,
1248                link: Link::Web {
1249                    url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1250                },
1251                alt_text: Some("test".into()),
1252                height: None,
1253                width: None,
1254            },)
1255        );
1256    }
1257
1258    #[gpui::test]
1259    async fn test_image_without_alt_text() {
1260        let parsed = parse("![](http://example.com/foo.png)").await;
1261
1262        let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1263            text
1264        } else {
1265            panic!("Expected a paragraph");
1266        };
1267        assert_eq!(
1268            paragraph[0],
1269            MarkdownParagraphChunk::Image(Image {
1270                source_range: 0..31,
1271                link: Link::Web {
1272                    url: "http://example.com/foo.png".to_string(),
1273                },
1274                alt_text: None,
1275                height: None,
1276                width: None,
1277            },)
1278        );
1279    }
1280
1281    #[gpui::test]
1282    async fn test_image_with_alt_text_containing_formatting() {
1283        let parsed = parse("![foo *bar* baz](http://example.com/foo.png)").await;
1284
1285        let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1286            panic!("Expected a paragraph");
1287        };
1288        assert_eq!(
1289            chunks,
1290            &[MarkdownParagraphChunk::Image(Image {
1291                source_range: 0..44,
1292                link: Link::Web {
1293                    url: "http://example.com/foo.png".to_string(),
1294                },
1295                alt_text: Some("foo bar baz".into()),
1296                height: None,
1297                width: None,
1298            }),],
1299        );
1300    }
1301
1302    #[gpui::test]
1303    async fn test_images_with_text_in_between() {
1304        let parsed = parse(
1305            "![foo](http://example.com/foo.png)\nLorem Ipsum\n![bar](http://example.com/bar.png)",
1306        )
1307        .await;
1308
1309        let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1310            text
1311        } else {
1312            panic!("Expected a paragraph");
1313        };
1314        assert_eq!(
1315            chunks,
1316            &vec![
1317                MarkdownParagraphChunk::Image(Image {
1318                    source_range: 0..81,
1319                    link: Link::Web {
1320                        url: "http://example.com/foo.png".to_string(),
1321                    },
1322                    alt_text: Some("foo".into()),
1323                    height: None,
1324                    width: None,
1325                }),
1326                MarkdownParagraphChunk::Text(ParsedMarkdownText {
1327                    source_range: 0..81,
1328                    contents: " Lorem Ipsum ".to_string(),
1329                    highlights: Vec::new(),
1330                    region_ranges: Vec::new(),
1331                    regions: Vec::new(),
1332                }),
1333                MarkdownParagraphChunk::Image(Image {
1334                    source_range: 0..81,
1335                    link: Link::Web {
1336                        url: "http://example.com/bar.png".to_string(),
1337                    },
1338                    alt_text: Some("bar".into()),
1339                    height: None,
1340                    width: None,
1341                })
1342            ]
1343        );
1344    }
1345
1346    #[test]
1347    fn test_parse_length() {
1348        // Test percentage values
1349        assert_eq!(
1350            MarkdownParser::parse_length("50%"),
1351            Some(DefiniteLength::Fraction(0.5))
1352        );
1353        assert_eq!(
1354            MarkdownParser::parse_length("100%"),
1355            Some(DefiniteLength::Fraction(1.0))
1356        );
1357        assert_eq!(
1358            MarkdownParser::parse_length("25%"),
1359            Some(DefiniteLength::Fraction(0.25))
1360        );
1361        assert_eq!(
1362            MarkdownParser::parse_length("0%"),
1363            Some(DefiniteLength::Fraction(0.0))
1364        );
1365
1366        // Test pixel values
1367        assert_eq!(
1368            MarkdownParser::parse_length("100px"),
1369            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1370        );
1371        assert_eq!(
1372            MarkdownParser::parse_length("50px"),
1373            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1374        );
1375        assert_eq!(
1376            MarkdownParser::parse_length("0px"),
1377            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1378        );
1379
1380        // Test values without units (should be treated as pixels)
1381        assert_eq!(
1382            MarkdownParser::parse_length("100"),
1383            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1384        );
1385        assert_eq!(
1386            MarkdownParser::parse_length("42"),
1387            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1388        );
1389
1390        // Test invalid values
1391        assert_eq!(MarkdownParser::parse_length("invalid"), None);
1392        assert_eq!(MarkdownParser::parse_length("px"), None);
1393        assert_eq!(MarkdownParser::parse_length("%"), None);
1394        assert_eq!(MarkdownParser::parse_length(""), None);
1395        assert_eq!(MarkdownParser::parse_length("abc%"), None);
1396        assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1397
1398        // Test decimal values
1399        assert_eq!(
1400            MarkdownParser::parse_length("50.5%"),
1401            Some(DefiniteLength::Fraction(0.505))
1402        );
1403        assert_eq!(
1404            MarkdownParser::parse_length("100.25px"),
1405            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1406        );
1407        assert_eq!(
1408            MarkdownParser::parse_length("42.0"),
1409            Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1410        );
1411    }
1412
1413    #[gpui::test]
1414    async fn test_html_table() {
1415        let parsed = parse(
1416            "<table>
1417          <thead>
1418            <tr>
1419              <th>Id</th>
1420              <th>Name</th>
1421            </tr>
1422          </thead>
1423          <tbody>
1424            <tr>
1425              <td>1</td>
1426              <td>Chris</td>
1427            </tr>
1428            <tr>
1429              <td>2</td>
1430              <td>Dennis</td>
1431            </tr>
1432          </tbody>
1433        </table>",
1434        )
1435        .await;
1436
1437        assert_eq!(
1438            ParsedMarkdown {
1439                children: vec![ParsedMarkdownElement::Table(table(
1440                    0..366,
1441                    row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1442                    vec![
1443                        row(vec![text("1", 0..366), text("Chris", 0..366)]),
1444                        row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1445                    ],
1446                ))],
1447            },
1448            parsed
1449        );
1450    }
1451
1452    #[gpui::test]
1453    async fn test_html_table_without_headings() {
1454        let parsed = parse(
1455            "<table>
1456          <tbody>
1457            <tr>
1458              <td>1</td>
1459              <td>Chris</td>
1460            </tr>
1461            <tr>
1462              <td>2</td>
1463              <td>Dennis</td>
1464            </tr>
1465          </tbody>
1466        </table>",
1467        )
1468        .await;
1469
1470        assert_eq!(
1471            ParsedMarkdown {
1472                children: vec![ParsedMarkdownElement::Table(table(
1473                    0..240,
1474                    row(vec![]),
1475                    vec![
1476                        row(vec![text("1", 0..240), text("Chris", 0..240)]),
1477                        row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1478                    ],
1479                ))],
1480            },
1481            parsed
1482        );
1483    }
1484
1485    #[gpui::test]
1486    async fn test_html_table_without_body() {
1487        let parsed = parse(
1488            "<table>
1489          <thead>
1490            <tr>
1491              <th>Id</th>
1492              <th>Name</th>
1493            </tr>
1494          </thead>
1495        </table>",
1496        )
1497        .await;
1498
1499        assert_eq!(
1500            ParsedMarkdown {
1501                children: vec![ParsedMarkdownElement::Table(table(
1502                    0..150,
1503                    row(vec![text("Id", 0..150), text("Name", 0..150)]),
1504                    vec![],
1505                ))],
1506            },
1507            parsed
1508        );
1509    }
1510
1511    #[gpui::test]
1512    async fn test_html_heading_tags() {
1513        let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1514
1515        assert_eq!(
1516            ParsedMarkdown {
1517                children: vec![
1518                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1519                        level: HeadingLevel::H1,
1520                        source_range: 0..96,
1521                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1522                            source_range: 0..96,
1523                            contents: "Heading".into(),
1524                            highlights: Vec::default(),
1525                            region_ranges: Vec::default(),
1526                            regions: Vec::default()
1527                        })],
1528                    }),
1529                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1530                        level: HeadingLevel::H2,
1531                        source_range: 0..96,
1532                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1533                            source_range: 0..96,
1534                            contents: "Heading".into(),
1535                            highlights: Vec::default(),
1536                            region_ranges: Vec::default(),
1537                            regions: Vec::default()
1538                        })],
1539                    }),
1540                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1541                        level: HeadingLevel::H3,
1542                        source_range: 0..96,
1543                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1544                            source_range: 0..96,
1545                            contents: "Heading".into(),
1546                            highlights: Vec::default(),
1547                            region_ranges: Vec::default(),
1548                            regions: Vec::default()
1549                        })],
1550                    }),
1551                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1552                        level: HeadingLevel::H4,
1553                        source_range: 0..96,
1554                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1555                            source_range: 0..96,
1556                            contents: "Heading".into(),
1557                            highlights: Vec::default(),
1558                            region_ranges: Vec::default(),
1559                            regions: Vec::default()
1560                        })],
1561                    }),
1562                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1563                        level: HeadingLevel::H5,
1564                        source_range: 0..96,
1565                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1566                            source_range: 0..96,
1567                            contents: "Heading".into(),
1568                            highlights: Vec::default(),
1569                            region_ranges: Vec::default(),
1570                            regions: Vec::default()
1571                        })],
1572                    }),
1573                    ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1574                        level: HeadingLevel::H6,
1575                        source_range: 0..96,
1576                        contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1577                            source_range: 0..96,
1578                            contents: "Heading".into(),
1579                            highlights: Vec::default(),
1580                            region_ranges: Vec::default(),
1581                            regions: Vec::default()
1582                        })],
1583                    }),
1584                ],
1585            },
1586            parsed
1587        );
1588    }
1589
1590    #[gpui::test]
1591    async fn test_html_image_tag() {
1592        let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1593
1594        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1595            panic!("Expected a image element");
1596        };
1597        assert_eq!(
1598            image.clone(),
1599            Image {
1600                source_range: 0..40,
1601                link: Link::Web {
1602                    url: "http://example.com/foo.png".to_string(),
1603                },
1604                alt_text: None,
1605                height: None,
1606                width: None,
1607            },
1608        );
1609    }
1610
1611    #[gpui::test]
1612    async fn test_html_image_tag_with_alt_text() {
1613        let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1614
1615        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1616            panic!("Expected a image element");
1617        };
1618        assert_eq!(
1619            image.clone(),
1620            Image {
1621                source_range: 0..50,
1622                link: Link::Web {
1623                    url: "http://example.com/foo.png".to_string(),
1624                },
1625                alt_text: Some("Foo".into()),
1626                height: None,
1627                width: None,
1628            },
1629        );
1630    }
1631
1632    #[gpui::test]
1633    async fn test_html_image_tag_with_height_and_width() {
1634        let parsed =
1635            parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1636
1637        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1638            panic!("Expected a image element");
1639        };
1640        assert_eq!(
1641            image.clone(),
1642            Image {
1643                source_range: 0..65,
1644                link: Link::Web {
1645                    url: "http://example.com/foo.png".to_string(),
1646                },
1647                alt_text: None,
1648                height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1649                width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1650            },
1651        );
1652    }
1653
1654    #[gpui::test]
1655    async fn test_html_image_style_tag_with_height_and_width() {
1656        let parsed = parse(
1657            "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1658        )
1659        .await;
1660
1661        let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1662            panic!("Expected a image element");
1663        };
1664        assert_eq!(
1665            image.clone(),
1666            Image {
1667                source_range: 0..75,
1668                link: Link::Web {
1669                    url: "http://example.com/foo.png".to_string(),
1670                },
1671                alt_text: None,
1672                height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1673                width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1674            },
1675        );
1676    }
1677
1678    #[gpui::test]
1679    async fn test_header_only_table() {
1680        let markdown = "\
1681| Header 1 | Header 2 |
1682|----------|----------|
1683
1684Some other content
1685";
1686
1687        let expected_table = table(
1688            0..48,
1689            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1690            vec![],
1691        );
1692
1693        assert_eq!(
1694            parse(markdown).await.children[0],
1695            ParsedMarkdownElement::Table(expected_table)
1696        );
1697    }
1698
1699    #[gpui::test]
1700    async fn test_basic_table() {
1701        let markdown = "\
1702| Header 1 | Header 2 |
1703|----------|----------|
1704| Cell 1   | Cell 2   |
1705| Cell 3   | Cell 4   |";
1706
1707        let expected_table = table(
1708            0..95,
1709            row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1710            vec![
1711                row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1712                row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1713            ],
1714        );
1715
1716        assert_eq!(
1717            parse(markdown).await.children[0],
1718            ParsedMarkdownElement::Table(expected_table)
1719        );
1720    }
1721
1722    #[gpui::test]
1723    async fn test_list_basic() {
1724        let parsed = parse(
1725            "\
1726* Item 1
1727* Item 2
1728* Item 3
1729",
1730        )
1731        .await;
1732
1733        assert_eq!(
1734            parsed.children,
1735            vec![
1736                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1737                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1738                list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1739            ],
1740        );
1741    }
1742
1743    #[gpui::test]
1744    async fn test_list_with_tasks() {
1745        let parsed = parse(
1746            "\
1747- [ ] TODO
1748- [x] Checked
1749",
1750        )
1751        .await;
1752
1753        assert_eq!(
1754            parsed.children,
1755            vec![
1756                list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1757                list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1758            ],
1759        );
1760    }
1761
1762    #[gpui::test]
1763    async fn test_list_with_indented_task() {
1764        let parsed = parse(
1765            "\
1766- [ ] TODO
1767  - [x] Checked
1768  - Unordered
1769  1. Number 1
1770  1. Number 2
17711. Number A
1772",
1773        )
1774        .await;
1775
1776        assert_eq!(
1777            parsed.children,
1778            vec![
1779                list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1780                list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1781                list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1782                list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1783                list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1784                list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1785            ],
1786        );
1787    }
1788
1789    #[gpui::test]
1790    async fn test_list_with_linebreak_is_handled_correctly() {
1791        let parsed = parse(
1792            "\
1793- [ ] Task 1
1794
1795- [x] Task 2
1796",
1797        )
1798        .await;
1799
1800        assert_eq!(
1801            parsed.children,
1802            vec![
1803                list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1804                list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1805            ],
1806        );
1807    }
1808
1809    #[gpui::test]
1810    async fn test_list_nested() {
1811        let parsed = parse(
1812            "\
1813* Item 1
1814* Item 2
1815* Item 3
1816
18171. Hello
18181. Two
1819   1. Three
18202. Four
18213. Five
1822
1823* First
1824  1. Hello
1825     1. Goodbyte
1826        - Inner
1827        - Inner
1828  2. Goodbyte
1829        - Next item empty
1830        -
1831* Last
1832",
1833        )
1834        .await;
1835
1836        assert_eq!(
1837            parsed.children,
1838            vec![
1839                list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1840                list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1841                list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1842                list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1843                list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1844                list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1845                list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1846                list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1847                list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1848                list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1849                list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1850                list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1851                list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1852                list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1853                list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1854                list_item(186..190, 3, Unordered, vec![]),
1855                list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1856            ]
1857        );
1858    }
1859
1860    #[gpui::test]
1861    async fn test_list_with_nested_content() {
1862        let parsed = parse(
1863            "\
1864*   This is a list item with two paragraphs.
1865
1866    This is the second paragraph in the list item.
1867",
1868        )
1869        .await;
1870
1871        assert_eq!(
1872            parsed.children,
1873            vec![list_item(
1874                0..96,
1875                1,
1876                Unordered,
1877                vec![
1878                    p("This is a list item with two paragraphs.", 4..44),
1879                    p("This is the second paragraph in the list item.", 50..97)
1880                ],
1881            ),],
1882        );
1883    }
1884
1885    #[gpui::test]
1886    async fn test_list_item_with_inline_html() {
1887        let parsed = parse(
1888            "\
1889*   This is a list item with an inline HTML <sometag>tag</sometag>.
1890",
1891        )
1892        .await;
1893
1894        assert_eq!(
1895            parsed.children,
1896            vec![list_item(
1897                0..67,
1898                1,
1899                Unordered,
1900                vec![p("This is a list item with an inline HTML tag.", 4..44),],
1901            ),],
1902        );
1903    }
1904
1905    #[gpui::test]
1906    async fn test_nested_list_with_paragraph_inside() {
1907        let parsed = parse(
1908            "\
19091. a
1910    1. b
1911        1. c
1912
1913    text
1914
1915    1. d
1916",
1917        )
1918        .await;
1919
1920        assert_eq!(
1921            parsed.children,
1922            vec![
1923                list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1924                list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1925                list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1926                p("text", 32..37),
1927                list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1928            ],
1929        );
1930    }
1931
1932    #[gpui::test]
1933    async fn test_list_with_leading_text() {
1934        let parsed = parse(
1935            "\
1936* `code`
1937* **bold**
1938* [link](https://example.com)
1939",
1940        )
1941        .await;
1942
1943        assert_eq!(
1944            parsed.children,
1945            vec![
1946                list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1947                list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1948                list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1949            ],
1950        );
1951    }
1952
1953    #[gpui::test]
1954    async fn test_simple_block_quote() {
1955        let parsed = parse("> Simple block quote with **styled text**").await;
1956
1957        assert_eq!(
1958            parsed.children,
1959            vec![block_quote(
1960                vec![p("Simple block quote with styled text", 2..41)],
1961                0..41
1962            )]
1963        );
1964    }
1965
1966    #[gpui::test]
1967    async fn test_simple_block_quote_with_multiple_lines() {
1968        let parsed = parse(
1969            "\
1970> # Heading
1971> More
1972> text
1973>
1974> More text
1975",
1976        )
1977        .await;
1978
1979        assert_eq!(
1980            parsed.children,
1981            vec![block_quote(
1982                vec![
1983                    h1(text("Heading", 4..11), 2..12),
1984                    p("More text", 14..26),
1985                    p("More text", 30..40)
1986                ],
1987                0..40
1988            )]
1989        );
1990    }
1991
1992    #[gpui::test]
1993    async fn test_nested_block_quote() {
1994        let parsed = parse(
1995            "\
1996> A
1997>
1998> > # B
1999>
2000> C
2001
2002More text
2003",
2004        )
2005        .await;
2006
2007        assert_eq!(
2008            parsed.children,
2009            vec![
2010                block_quote(
2011                    vec![
2012                        p("A", 2..4),
2013                        block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2014                        p("C", 18..20)
2015                    ],
2016                    0..20
2017                ),
2018                p("More text", 21..31)
2019            ]
2020        );
2021    }
2022
2023    #[gpui::test]
2024    async fn test_code_block() {
2025        let parsed = parse(
2026            "\
2027```
2028fn main() {
2029    return 0;
2030}
2031```
2032",
2033        )
2034        .await;
2035
2036        assert_eq!(
2037            parsed.children,
2038            vec![code_block(
2039                None,
2040                "fn main() {\n    return 0;\n}",
2041                0..35,
2042                None
2043            )]
2044        );
2045    }
2046
2047    #[gpui::test]
2048    async fn test_code_block_with_language(executor: BackgroundExecutor) {
2049        let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2050        language_registry.add(rust_lang());
2051
2052        let parsed = parse_markdown(
2053            "\
2054```rust
2055fn main() {
2056    return 0;
2057}
2058```
2059",
2060            None,
2061            Some(language_registry),
2062        )
2063        .await;
2064
2065        assert_eq!(
2066            parsed.children,
2067            vec![code_block(
2068                Some("rust".to_string()),
2069                "fn main() {\n    return 0;\n}",
2070                0..39,
2071                Some(vec![])
2072            )]
2073        );
2074    }
2075
2076    fn rust_lang() -> Arc<Language> {
2077        Arc::new(Language::new(
2078            LanguageConfig {
2079                name: "Rust".into(),
2080                matcher: LanguageMatcher {
2081                    path_suffixes: vec!["rs".into()],
2082                    ..Default::default()
2083                },
2084                collapsed_placeholder: " /* ... */ ".to_string(),
2085                ..Default::default()
2086            },
2087            Some(tree_sitter_rust::LANGUAGE.into()),
2088        ))
2089    }
2090
2091    fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2092        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2093            source_range,
2094            level: HeadingLevel::H1,
2095            contents,
2096        })
2097    }
2098
2099    fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2100        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2101            source_range,
2102            level: HeadingLevel::H2,
2103            contents,
2104        })
2105    }
2106
2107    fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2108        ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2109            source_range,
2110            level: HeadingLevel::H3,
2111            contents,
2112        })
2113    }
2114
2115    fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2116        ParsedMarkdownElement::Paragraph(text(contents, source_range))
2117    }
2118
2119    fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2120        vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2121            highlights: Vec::new(),
2122            region_ranges: Vec::new(),
2123            regions: Vec::new(),
2124            source_range,
2125            contents: contents.to_string(),
2126        })]
2127    }
2128
2129    fn block_quote(
2130        children: Vec<ParsedMarkdownElement>,
2131        source_range: Range<usize>,
2132    ) -> ParsedMarkdownElement {
2133        ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2134            source_range,
2135            children,
2136        })
2137    }
2138
2139    fn code_block(
2140        language: Option<String>,
2141        code: &str,
2142        source_range: Range<usize>,
2143        highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2144    ) -> ParsedMarkdownElement {
2145        ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2146            source_range,
2147            language,
2148            contents: code.to_string().into(),
2149            highlights,
2150        })
2151    }
2152
2153    fn list_item(
2154        source_range: Range<usize>,
2155        depth: u16,
2156        item_type: ParsedMarkdownListItemType,
2157        content: Vec<ParsedMarkdownElement>,
2158    ) -> ParsedMarkdownElement {
2159        ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2160            source_range,
2161            item_type,
2162            depth,
2163            content,
2164        })
2165    }
2166
2167    fn table(
2168        source_range: Range<usize>,
2169        header: ParsedMarkdownTableRow,
2170        body: Vec<ParsedMarkdownTableRow>,
2171    ) -> ParsedMarkdownTable {
2172        ParsedMarkdownTable {
2173            column_alignments: Vec::new(),
2174            source_range,
2175            header,
2176            body,
2177        }
2178    }
2179
2180    fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2181        ParsedMarkdownTableRow { children }
2182    }
2183
2184    impl PartialEq for ParsedMarkdownTable {
2185        fn eq(&self, other: &Self) -> bool {
2186            self.source_range == other.source_range
2187                && self.header == other.header
2188                && self.body == other.body
2189        }
2190    }
2191
2192    impl PartialEq for ParsedMarkdownText {
2193        fn eq(&self, other: &Self) -> bool {
2194            self.source_range == other.source_range && self.contents == other.contents
2195        }
2196    }
2197}