parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut events = Vec::new();
  9    let mut within_link = false;
 10    let mut within_metadata = false;
 11    for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
 12        if within_metadata {
 13            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 14                pulldown_event
 15            {
 16                within_metadata = false;
 17            }
 18            continue;
 19        }
 20        match pulldown_event {
 21            pulldown_cmark::Event::Start(tag) => {
 22                match tag {
 23                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 24                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 25                    _ => {}
 26                }
 27                events.push((range, MarkdownEvent::Start(tag.into())))
 28            }
 29            pulldown_cmark::Event::End(tag) => {
 30                if let pulldown_cmark::TagEnd::Link = tag {
 31                    within_link = false;
 32                }
 33                events.push((range, MarkdownEvent::End(tag)));
 34            }
 35            pulldown_cmark::Event::Text(_) => {
 36                // Automatically detect links in text if we're not already within a markdown
 37                // link.
 38                if !within_link {
 39                    let mut finder = LinkFinder::new();
 40                    finder.kinds(&[linkify::LinkKind::Url]);
 41                    let text_range = range.clone();
 42                    for link in finder.links(&text[text_range.clone()]) {
 43                        let link_range =
 44                            text_range.start + link.start()..text_range.start + link.end();
 45
 46                        if link_range.start > range.start {
 47                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 48                        }
 49
 50                        events.push((
 51                            link_range.clone(),
 52                            MarkdownEvent::Start(MarkdownTag::Link {
 53                                link_type: LinkType::Autolink,
 54                                dest_url: SharedString::from(link.as_str().to_string()),
 55                                title: SharedString::default(),
 56                                id: SharedString::default(),
 57                            }),
 58                        ));
 59                        events.push((link_range.clone(), MarkdownEvent::Text));
 60                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 61
 62                        range.start = link_range.end;
 63                    }
 64                }
 65
 66                if range.start < range.end {
 67                    events.push((range, MarkdownEvent::Text));
 68                }
 69            }
 70            pulldown_cmark::Event::Code(_) => {
 71                range.start += 1;
 72                range.end -= 1;
 73                events.push((range, MarkdownEvent::Code))
 74            }
 75            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 76            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 77            pulldown_cmark::Event::FootnoteReference(_) => {
 78                events.push((range, MarkdownEvent::FootnoteReference))
 79            }
 80            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 81            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 82            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 83            pulldown_cmark::Event::TaskListMarker(checked) => {
 84                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 85            }
 86        }
 87    }
 88    events
 89}
 90
 91/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
 92/// parse result for rendering without resorting to unsafe lifetime coercion.
 93#[derive(Clone, Debug, PartialEq)]
 94pub enum MarkdownEvent {
 95    /// Start of a tagged element. Events that are yielded after this event
 96    /// and before its corresponding `End` event are inside this element.
 97    /// Start and end events are guaranteed to be balanced.
 98    Start(MarkdownTag),
 99    /// End of a tagged element.
100    End(MarkdownTagEnd),
101    /// A text node.
102    Text,
103    /// An inline code node.
104    Code,
105    /// An HTML node.
106    Html,
107    /// An inline HTML node.
108    InlineHtml,
109    /// A reference to a footnote with given label, which may or may not be defined
110    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
111    /// occur in any order.
112    FootnoteReference,
113    /// A soft line break.
114    SoftBreak,
115    /// A hard line break.
116    HardBreak,
117    /// A horizontal ruler.
118    Rule,
119    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
120    TaskListMarker(bool),
121}
122
123/// Tags for elements that can contain other elements.
124#[derive(Clone, Debug, PartialEq)]
125pub enum MarkdownTag {
126    /// A paragraph of text and other inline elements.
127    Paragraph,
128
129    /// A heading, with optional identifier, classes and custom attributes.
130    /// The identifier is prefixed with `#` and the last one in the attributes
131    /// list is chosen, classes are prefixed with `.` and custom attributes
132    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
133    Heading {
134        level: HeadingLevel,
135        id: Option<SharedString>,
136        classes: Vec<SharedString>,
137        /// The first item of the tuple is the attr and second one the value.
138        attrs: Vec<(SharedString, Option<SharedString>)>,
139    },
140
141    BlockQuote,
142
143    /// A code block.
144    CodeBlock(CodeBlockKind),
145
146    /// A HTML block.
147    HtmlBlock,
148
149    /// A list. If the list is ordered the field indicates the number of the first item.
150    /// Contains only list items.
151    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
152
153    /// A list item.
154    Item,
155
156    /// A footnote definition. The value contained is the footnote's label by which it can
157    /// be referred to.
158    FootnoteDefinition(SharedString),
159
160    /// A table. Contains a vector describing the text-alignment for each of its columns.
161    Table(Vec<Alignment>),
162
163    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
164    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
165    TableHead,
166
167    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
168    TableRow,
169    TableCell,
170
171    // span-level tags
172    Emphasis,
173    Strong,
174    Strikethrough,
175
176    /// A link.
177    Link {
178        link_type: LinkType,
179        dest_url: SharedString,
180        title: SharedString,
181        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
182        id: SharedString,
183    },
184
185    /// An image. The first field is the link type, the second the destination URL and the third is a title,
186    /// the fourth is the link identifier.
187    Image {
188        link_type: LinkType,
189        dest_url: SharedString,
190        title: SharedString,
191        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
192        id: SharedString,
193    },
194
195    /// A metadata block.
196    MetadataBlock(MetadataBlockKind),
197}
198
199#[derive(Clone, Debug, PartialEq)]
200pub enum CodeBlockKind {
201    Indented,
202    /// The value contained in the tag describes the language of the code, which may be empty.
203    Fenced(SharedString),
204}
205
206impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
207    fn from(tag: pulldown_cmark::Tag) -> Self {
208        match tag {
209            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
210            pulldown_cmark::Tag::Heading {
211                level,
212                id,
213                classes,
214                attrs,
215            } => {
216                let id = id.map(|id| SharedString::from(id.into_string()));
217                let classes = classes
218                    .into_iter()
219                    .map(|c| SharedString::from(c.into_string()))
220                    .collect();
221                let attrs = attrs
222                    .into_iter()
223                    .map(|(key, value)| {
224                        (
225                            SharedString::from(key.into_string()),
226                            value.map(|v| SharedString::from(v.into_string())),
227                        )
228                    })
229                    .collect();
230                MarkdownTag::Heading {
231                    level,
232                    id,
233                    classes,
234                    attrs,
235                }
236            }
237            pulldown_cmark::Tag::BlockQuote => MarkdownTag::BlockQuote,
238            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
239                pulldown_cmark::CodeBlockKind::Indented => {
240                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
241                }
242                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
243                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
244                ),
245            },
246            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
247            pulldown_cmark::Tag::Item => MarkdownTag::Item,
248            pulldown_cmark::Tag::FootnoteDefinition(label) => {
249                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
250            }
251            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
252            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
253            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
254            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
255            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
256            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
257            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
258            pulldown_cmark::Tag::Link {
259                link_type,
260                dest_url,
261                title,
262                id,
263            } => MarkdownTag::Link {
264                link_type,
265                dest_url: SharedString::from(dest_url.into_string()),
266                title: SharedString::from(title.into_string()),
267                id: SharedString::from(id.into_string()),
268            },
269            pulldown_cmark::Tag::Image {
270                link_type,
271                dest_url,
272                title,
273                id,
274            } => MarkdownTag::Image {
275                link_type,
276                dest_url: SharedString::from(dest_url.into_string()),
277                title: SharedString::from(title.into_string()),
278                id: SharedString::from(id.into_string()),
279            },
280            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
281            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
282        }
283    }
284}