parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut options = Options::all();
  9    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
 10    options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
 11    options.remove(pulldown_cmark::Options::ENABLE_MATH);
 12
 13    let mut events = Vec::new();
 14    let mut within_link = false;
 15    let mut within_metadata = false;
 16    for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
 17        if within_metadata {
 18            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 19                pulldown_event
 20            {
 21                within_metadata = false;
 22            }
 23            continue;
 24        }
 25        match pulldown_event {
 26            pulldown_cmark::Event::Start(tag) => {
 27                match tag {
 28                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 29                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 30                    _ => {}
 31                }
 32                events.push((range, MarkdownEvent::Start(tag.into())))
 33            }
 34            pulldown_cmark::Event::End(tag) => {
 35                if let pulldown_cmark::TagEnd::Link = tag {
 36                    within_link = false;
 37                }
 38                events.push((range, MarkdownEvent::End(tag)));
 39            }
 40            pulldown_cmark::Event::Text(_) => {
 41                // Automatically detect links in text if we're not already within a markdown
 42                // link.
 43                if !within_link {
 44                    let mut finder = LinkFinder::new();
 45                    finder.kinds(&[linkify::LinkKind::Url]);
 46                    let text_range = range.clone();
 47                    for link in finder.links(&text[text_range.clone()]) {
 48                        let link_range =
 49                            text_range.start + link.start()..text_range.start + link.end();
 50
 51                        if link_range.start > range.start {
 52                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 53                        }
 54
 55                        events.push((
 56                            link_range.clone(),
 57                            MarkdownEvent::Start(MarkdownTag::Link {
 58                                link_type: LinkType::Autolink,
 59                                dest_url: SharedString::from(link.as_str().to_string()),
 60                                title: SharedString::default(),
 61                                id: SharedString::default(),
 62                            }),
 63                        ));
 64                        events.push((link_range.clone(), MarkdownEvent::Text));
 65                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 66
 67                        range.start = link_range.end;
 68                    }
 69                }
 70
 71                if range.start < range.end {
 72                    events.push((range, MarkdownEvent::Text));
 73                }
 74            }
 75            pulldown_cmark::Event::Code(_) => {
 76                range.start += 1;
 77                range.end -= 1;
 78                events.push((range, MarkdownEvent::Code))
 79            }
 80            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 81            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 82            pulldown_cmark::Event::FootnoteReference(_) => {
 83                events.push((range, MarkdownEvent::FootnoteReference))
 84            }
 85            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 86            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 87            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 88            pulldown_cmark::Event::TaskListMarker(checked) => {
 89                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 90            }
 91            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
 92        }
 93    }
 94    events
 95}
 96
 97pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
 98    let mut events = Vec::new();
 99    let mut finder = LinkFinder::new();
100    finder.kinds(&[linkify::LinkKind::Url]);
101    let mut text_range = Range {
102        start: 0,
103        end: text.len(),
104    };
105    for link in finder.links(text) {
106        let link_range = link.start()..link.end();
107
108        if link_range.start > text_range.start {
109            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
110        }
111
112        events.push((
113            link_range.clone(),
114            MarkdownEvent::Start(MarkdownTag::Link {
115                link_type: LinkType::Autolink,
116                dest_url: SharedString::from(link.as_str().to_string()),
117                title: SharedString::default(),
118                id: SharedString::default(),
119            }),
120        ));
121        events.push((link_range.clone(), MarkdownEvent::Text));
122        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
123
124        text_range.start = link_range.end;
125    }
126
127    if text_range.end > text_range.start {
128        events.push((text_range, MarkdownEvent::Text));
129    }
130
131    events
132}
133
134/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
135/// parse result for rendering without resorting to unsafe lifetime coercion.
136#[derive(Clone, Debug, PartialEq)]
137pub enum MarkdownEvent {
138    /// Start of a tagged element. Events that are yielded after this event
139    /// and before its corresponding `End` event are inside this element.
140    /// Start and end events are guaranteed to be balanced.
141    Start(MarkdownTag),
142    /// End of a tagged element.
143    End(MarkdownTagEnd),
144    /// A text node.
145    Text,
146    /// An inline code node.
147    Code,
148    /// An HTML node.
149    Html,
150    /// An inline HTML node.
151    InlineHtml,
152    /// A reference to a footnote with given label, which may or may not be defined
153    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
154    /// occur in any order.
155    FootnoteReference,
156    /// A soft line break.
157    SoftBreak,
158    /// A hard line break.
159    HardBreak,
160    /// A horizontal ruler.
161    Rule,
162    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
163    TaskListMarker(bool),
164}
165
166/// Tags for elements that can contain other elements.
167#[derive(Clone, Debug, PartialEq)]
168pub enum MarkdownTag {
169    /// A paragraph of text and other inline elements.
170    Paragraph,
171
172    /// A heading, with optional identifier, classes and custom attributes.
173    /// The identifier is prefixed with `#` and the last one in the attributes
174    /// list is chosen, classes are prefixed with `.` and custom attributes
175    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
176    Heading {
177        level: HeadingLevel,
178        id: Option<SharedString>,
179        classes: Vec<SharedString>,
180        /// The first item of the tuple is the attr and second one the value.
181        attrs: Vec<(SharedString, Option<SharedString>)>,
182    },
183
184    BlockQuote,
185
186    /// A code block.
187    CodeBlock(CodeBlockKind),
188
189    /// A HTML block.
190    HtmlBlock,
191
192    /// A list. If the list is ordered the field indicates the number of the first item.
193    /// Contains only list items.
194    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
195
196    /// A list item.
197    Item,
198
199    /// A footnote definition. The value contained is the footnote's label by which it can
200    /// be referred to.
201    FootnoteDefinition(SharedString),
202
203    /// A table. Contains a vector describing the text-alignment for each of its columns.
204    Table(Vec<Alignment>),
205
206    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
207    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
208    TableHead,
209
210    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
211    TableRow,
212    TableCell,
213
214    // span-level tags
215    Emphasis,
216    Strong,
217    Strikethrough,
218
219    /// A link.
220    Link {
221        link_type: LinkType,
222        dest_url: SharedString,
223        title: SharedString,
224        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
225        id: SharedString,
226    },
227
228    /// An image. The first field is the link type, the second the destination URL and the third is a title,
229    /// the fourth is the link identifier.
230    Image {
231        link_type: LinkType,
232        dest_url: SharedString,
233        title: SharedString,
234        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
235        id: SharedString,
236    },
237
238    /// A metadata block.
239    MetadataBlock(MetadataBlockKind),
240
241    DefinitionList,
242    DefinitionListTitle,
243    DefinitionListDefinition,
244}
245
246#[derive(Clone, Debug, PartialEq)]
247pub enum CodeBlockKind {
248    Indented,
249    /// The value contained in the tag describes the language of the code, which may be empty.
250    Fenced(SharedString),
251}
252
253impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
254    fn from(tag: pulldown_cmark::Tag) -> Self {
255        match tag {
256            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
257            pulldown_cmark::Tag::Heading {
258                level,
259                id,
260                classes,
261                attrs,
262            } => {
263                let id = id.map(|id| SharedString::from(id.into_string()));
264                let classes = classes
265                    .into_iter()
266                    .map(|c| SharedString::from(c.into_string()))
267                    .collect();
268                let attrs = attrs
269                    .into_iter()
270                    .map(|(key, value)| {
271                        (
272                            SharedString::from(key.into_string()),
273                            value.map(|v| SharedString::from(v.into_string())),
274                        )
275                    })
276                    .collect();
277                MarkdownTag::Heading {
278                    level,
279                    id,
280                    classes,
281                    attrs,
282                }
283            }
284            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
285            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
286                pulldown_cmark::CodeBlockKind::Indented => {
287                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
288                }
289                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
290                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
291                ),
292            },
293            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
294            pulldown_cmark::Tag::Item => MarkdownTag::Item,
295            pulldown_cmark::Tag::FootnoteDefinition(label) => {
296                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
297            }
298            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
299            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
300            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
301            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
302            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
303            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
304            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
305            pulldown_cmark::Tag::Link {
306                link_type,
307                dest_url,
308                title,
309                id,
310            } => MarkdownTag::Link {
311                link_type,
312                dest_url: SharedString::from(dest_url.into_string()),
313                title: SharedString::from(title.into_string()),
314                id: SharedString::from(id.into_string()),
315            },
316            pulldown_cmark::Tag::Image {
317                link_type,
318                dest_url,
319                title,
320                id,
321            } => MarkdownTag::Image {
322                link_type,
323                dest_url: SharedString::from(dest_url.into_string()),
324                title: SharedString::from(title.into_string()),
325                id: SharedString::from(id.into_string()),
326            },
327            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
328            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
329            pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
330            pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
331            pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
332        }
333    }
334}