parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut events = Vec::new();
  9    let mut within_link = false;
 10    for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
 11        match pulldown_event {
 12            pulldown_cmark::Event::Start(tag) => {
 13                if let pulldown_cmark::Tag::Link { .. } = tag {
 14                    within_link = true;
 15                }
 16                events.push((range, MarkdownEvent::Start(tag.into())))
 17            }
 18            pulldown_cmark::Event::End(tag) => {
 19                if let pulldown_cmark::TagEnd::Link = tag {
 20                    within_link = false;
 21                }
 22                events.push((range, MarkdownEvent::End(tag)));
 23            }
 24            pulldown_cmark::Event::Text(_) => {
 25                // Automatically detect links in text if we're not already within a markdown
 26                // link.
 27                if !within_link {
 28                    let mut finder = LinkFinder::new();
 29                    finder.kinds(&[linkify::LinkKind::Url]);
 30                    let text_range = range.clone();
 31                    for link in finder.links(&text[text_range.clone()]) {
 32                        let link_range =
 33                            text_range.start + link.start()..text_range.start + link.end();
 34
 35                        if link_range.start > range.start {
 36                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 37                        }
 38
 39                        events.push((
 40                            link_range.clone(),
 41                            MarkdownEvent::Start(MarkdownTag::Link {
 42                                link_type: LinkType::Autolink,
 43                                dest_url: SharedString::from(link.as_str().to_string()),
 44                                title: SharedString::default(),
 45                                id: SharedString::default(),
 46                            }),
 47                        ));
 48                        events.push((link_range.clone(), MarkdownEvent::Text));
 49                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 50
 51                        range.start = link_range.end;
 52                    }
 53                }
 54
 55                if range.start < range.end {
 56                    events.push((range, MarkdownEvent::Text));
 57                }
 58            }
 59            pulldown_cmark::Event::Code(_) => {
 60                range.start += 1;
 61                range.end -= 1;
 62                events.push((range, MarkdownEvent::Code))
 63            }
 64            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 65            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 66            pulldown_cmark::Event::FootnoteReference(_) => {
 67                events.push((range, MarkdownEvent::FootnoteReference))
 68            }
 69            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 70            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 71            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 72            pulldown_cmark::Event::TaskListMarker(checked) => {
 73                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 74            }
 75        }
 76    }
 77    events
 78}
 79
 80/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
 81/// parse result for rendering without resorting to unsafe lifetime coercion.
 82#[derive(Clone, Debug, PartialEq)]
 83pub enum MarkdownEvent {
 84    /// Start of a tagged element. Events that are yielded after this event
 85    /// and before its corresponding `End` event are inside this element.
 86    /// Start and end events are guaranteed to be balanced.
 87    Start(MarkdownTag),
 88    /// End of a tagged element.
 89    End(MarkdownTagEnd),
 90    /// A text node.
 91    Text,
 92    /// An inline code node.
 93    Code,
 94    /// An HTML node.
 95    Html,
 96    /// An inline HTML node.
 97    InlineHtml,
 98    /// A reference to a footnote with given label, which may or may not be defined
 99    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
100    /// occur in any order.
101    FootnoteReference,
102    /// A soft line break.
103    SoftBreak,
104    /// A hard line break.
105    HardBreak,
106    /// A horizontal ruler.
107    Rule,
108    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
109    TaskListMarker(bool),
110}
111
112/// Tags for elements that can contain other elements.
113#[derive(Clone, Debug, PartialEq)]
114pub enum MarkdownTag {
115    /// A paragraph of text and other inline elements.
116    Paragraph,
117
118    /// A heading, with optional identifier, classes and custom attributes.
119    /// The identifier is prefixed with `#` and the last one in the attributes
120    /// list is chosen, classes are prefixed with `.` and custom attributes
121    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
122    Heading {
123        level: HeadingLevel,
124        id: Option<SharedString>,
125        classes: Vec<SharedString>,
126        /// The first item of the tuple is the attr and second one the value.
127        attrs: Vec<(SharedString, Option<SharedString>)>,
128    },
129
130    BlockQuote,
131
132    /// A code block.
133    CodeBlock(CodeBlockKind),
134
135    /// A HTML block.
136    HtmlBlock,
137
138    /// A list. If the list is ordered the field indicates the number of the first item.
139    /// Contains only list items.
140    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
141
142    /// A list item.
143    Item,
144
145    /// A footnote definition. The value contained is the footnote's label by which it can
146    /// be referred to.
147    FootnoteDefinition(SharedString),
148
149    /// A table. Contains a vector describing the text-alignment for each of its columns.
150    Table(Vec<Alignment>),
151
152    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
153    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
154    TableHead,
155
156    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
157    TableRow,
158    TableCell,
159
160    // span-level tags
161    Emphasis,
162    Strong,
163    Strikethrough,
164
165    /// A link.
166    Link {
167        link_type: LinkType,
168        dest_url: SharedString,
169        title: SharedString,
170        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
171        id: SharedString,
172    },
173
174    /// An image. The first field is the link type, the second the destination URL and the third is a title,
175    /// the fourth is the link identifier.
176    Image {
177        link_type: LinkType,
178        dest_url: SharedString,
179        title: SharedString,
180        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
181        id: SharedString,
182    },
183
184    /// A metadata block.
185    MetadataBlock(MetadataBlockKind),
186}
187
188#[derive(Clone, Debug, PartialEq)]
189pub enum CodeBlockKind {
190    Indented,
191    /// The value contained in the tag describes the language of the code, which may be empty.
192    Fenced(SharedString),
193}
194
195impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
196    fn from(tag: pulldown_cmark::Tag) -> Self {
197        match tag {
198            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
199            pulldown_cmark::Tag::Heading {
200                level,
201                id,
202                classes,
203                attrs,
204            } => {
205                let id = id.map(|id| SharedString::from(id.into_string()));
206                let classes = classes
207                    .into_iter()
208                    .map(|c| SharedString::from(c.into_string()))
209                    .collect();
210                let attrs = attrs
211                    .into_iter()
212                    .map(|(key, value)| {
213                        (
214                            SharedString::from(key.into_string()),
215                            value.map(|v| SharedString::from(v.into_string())),
216                        )
217                    })
218                    .collect();
219                MarkdownTag::Heading {
220                    level,
221                    id,
222                    classes,
223                    attrs,
224                }
225            }
226            pulldown_cmark::Tag::BlockQuote => MarkdownTag::BlockQuote,
227            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
228                pulldown_cmark::CodeBlockKind::Indented => {
229                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
230                }
231                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
232                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
233                ),
234            },
235            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
236            pulldown_cmark::Tag::Item => MarkdownTag::Item,
237            pulldown_cmark::Tag::FootnoteDefinition(label) => {
238                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
239            }
240            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
241            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
242            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
243            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
244            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
245            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
246            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
247            pulldown_cmark::Tag::Link {
248                link_type,
249                dest_url,
250                title,
251                id,
252            } => MarkdownTag::Link {
253                link_type,
254                dest_url: SharedString::from(dest_url.into_string()),
255                title: SharedString::from(title.into_string()),
256                id: SharedString::from(id.into_string()),
257            },
258            pulldown_cmark::Tag::Image {
259                link_type,
260                dest_url,
261                title,
262                id,
263            } => MarkdownTag::Image {
264                link_type,
265                dest_url: SharedString::from(dest_url.into_string()),
266                title: SharedString::from(title.into_string()),
267                id: SharedString::from(id.into_string()),
268            },
269            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
270            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
271        }
272    }
273}