parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut options = Options::all();
  9    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
 10    options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
 11    options.remove(pulldown_cmark::Options::ENABLE_MATH);
 12
 13    let mut events = Vec::new();
 14    let mut within_link = false;
 15    let mut within_metadata = false;
 16    for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
 17        if within_metadata {
 18            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 19                pulldown_event
 20            {
 21                within_metadata = false;
 22            }
 23            continue;
 24        }
 25        match pulldown_event {
 26            pulldown_cmark::Event::Start(tag) => {
 27                match tag {
 28                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 29                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 30                    _ => {}
 31                }
 32                events.push((range, MarkdownEvent::Start(tag.into())))
 33            }
 34            pulldown_cmark::Event::End(tag) => {
 35                if let pulldown_cmark::TagEnd::Link = tag {
 36                    within_link = false;
 37                }
 38                events.push((range, MarkdownEvent::End(tag)));
 39            }
 40            pulldown_cmark::Event::Text(parsed) => {
 41                // Automatically detect links in text if we're not already within a markdown
 42                // link.
 43                let mut parsed = parsed.as_ref();
 44                if !within_link {
 45                    let mut finder = LinkFinder::new();
 46                    finder.kinds(&[linkify::LinkKind::Url]);
 47                    let text_range = range.clone();
 48                    for link in finder.links(&text[text_range.clone()]) {
 49                        let link_range =
 50                            text_range.start + link.start()..text_range.start + link.end();
 51
 52                        if link_range.start > range.start {
 53                            let (text, tail) = parsed.split_at(link_range.start - range.start);
 54                            events.push((
 55                                range.start..link_range.start,
 56                                MarkdownEvent::Text(SharedString::new(text)),
 57                            ));
 58                            parsed = tail;
 59                        }
 60
 61                        events.push((
 62                            link_range.clone(),
 63                            MarkdownEvent::Start(MarkdownTag::Link {
 64                                link_type: LinkType::Autolink,
 65                                dest_url: SharedString::from(link.as_str().to_string()),
 66                                title: SharedString::default(),
 67                                id: SharedString::default(),
 68                            }),
 69                        ));
 70
 71                        let (link_text, tail) = parsed.split_at(link_range.end - link_range.start);
 72                        events.push((
 73                            link_range.clone(),
 74                            MarkdownEvent::Text(SharedString::new(link_text)),
 75                        ));
 76                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 77
 78                        range.start = link_range.end;
 79                        parsed = tail;
 80                    }
 81                }
 82                if range.start < range.end {
 83                    events.push((range, MarkdownEvent::Text(SharedString::new(parsed))));
 84                }
 85            }
 86            pulldown_cmark::Event::Code(_) => {
 87                range.start += 1;
 88                range.end -= 1;
 89                events.push((range, MarkdownEvent::Code))
 90            }
 91            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 92            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 93            pulldown_cmark::Event::FootnoteReference(_) => {
 94                events.push((range, MarkdownEvent::FootnoteReference))
 95            }
 96            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 97            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 98            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 99            pulldown_cmark::Event::TaskListMarker(checked) => {
100                events.push((range, MarkdownEvent::TaskListMarker(checked)))
101            }
102            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
103        }
104    }
105    events
106}
107
108pub fn parse_links_only(mut text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
109    let mut events = Vec::new();
110    let mut finder = LinkFinder::new();
111    finder.kinds(&[linkify::LinkKind::Url]);
112    let mut text_range = Range {
113        start: 0,
114        end: text.len(),
115    };
116    for link in finder.links(text) {
117        let link_range = link.start()..link.end();
118
119        if link_range.start > text_range.start {
120            let (head, tail) = text.split_at(link_range.start - text_range.start);
121            events.push((
122                text_range.start..link_range.start,
123                MarkdownEvent::Text(SharedString::new(head)),
124            ));
125            text = tail;
126        }
127
128        let (link_text, tail) = text.split_at(link_range.end - link_range.start);
129        events.push((
130            link_range.clone(),
131            MarkdownEvent::Start(MarkdownTag::Link {
132                link_type: LinkType::Autolink,
133                dest_url: SharedString::from(link.as_str().to_string()),
134                title: SharedString::default(),
135                id: SharedString::default(),
136            }),
137        ));
138        events.push((
139            link_range.clone(),
140            MarkdownEvent::Text(SharedString::new(link_text)),
141        ));
142        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
143
144        text_range.start = link_range.end;
145        text = tail;
146    }
147
148    if text_range.end > text_range.start {
149        events.push((text_range, MarkdownEvent::Text(SharedString::new(text))));
150    }
151
152    events
153}
154
155/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
156/// parse result for rendering without resorting to unsafe lifetime coercion.
157#[derive(Clone, Debug, PartialEq)]
158pub enum MarkdownEvent {
159    /// Start of a tagged element. Events that are yielded after this event
160    /// and before its corresponding `End` event are inside this element.
161    /// Start and end events are guaranteed to be balanced.
162    Start(MarkdownTag),
163    /// End of a tagged element.
164    End(MarkdownTagEnd),
165    /// A text node.
166    Text(SharedString),
167    /// An inline code node.
168    Code,
169    /// An HTML node.
170    Html,
171    /// An inline HTML node.
172    InlineHtml,
173    /// A reference to a footnote with given label, which may or may not be defined
174    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
175    /// occur in any order.
176    FootnoteReference,
177    /// A soft line break.
178    SoftBreak,
179    /// A hard line break.
180    HardBreak,
181    /// A horizontal ruler.
182    Rule,
183    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
184    TaskListMarker(bool),
185}
186
187/// Tags for elements that can contain other elements.
188#[derive(Clone, Debug, PartialEq)]
189pub enum MarkdownTag {
190    /// A paragraph of text and other inline elements.
191    Paragraph,
192
193    /// A heading, with optional identifier, classes and custom attributes.
194    /// The identifier is prefixed with `#` and the last one in the attributes
195    /// list is chosen, classes are prefixed with `.` and custom attributes
196    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
197    Heading {
198        level: HeadingLevel,
199        id: Option<SharedString>,
200        classes: Vec<SharedString>,
201        /// The first item of the tuple is the attr and second one the value.
202        attrs: Vec<(SharedString, Option<SharedString>)>,
203    },
204
205    BlockQuote,
206
207    /// A code block.
208    CodeBlock(CodeBlockKind),
209
210    /// A HTML block.
211    HtmlBlock,
212
213    /// A list. If the list is ordered the field indicates the number of the first item.
214    /// Contains only list items.
215    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
216
217    /// A list item.
218    Item,
219
220    /// A footnote definition. The value contained is the footnote's label by which it can
221    /// be referred to.
222    FootnoteDefinition(SharedString),
223
224    /// A table. Contains a vector describing the text-alignment for each of its columns.
225    Table(Vec<Alignment>),
226
227    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
228    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
229    TableHead,
230
231    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
232    TableRow,
233    TableCell,
234
235    // span-level tags
236    Emphasis,
237    Strong,
238    Strikethrough,
239
240    /// A link.
241    Link {
242        link_type: LinkType,
243        dest_url: SharedString,
244        title: SharedString,
245        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
246        id: SharedString,
247    },
248
249    /// An image. The first field is the link type, the second the destination URL and the third is a title,
250    /// the fourth is the link identifier.
251    Image {
252        link_type: LinkType,
253        dest_url: SharedString,
254        title: SharedString,
255        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
256        id: SharedString,
257    },
258
259    /// A metadata block.
260    MetadataBlock(MetadataBlockKind),
261
262    DefinitionList,
263    DefinitionListTitle,
264    DefinitionListDefinition,
265}
266
267#[derive(Clone, Debug, PartialEq)]
268pub enum CodeBlockKind {
269    Indented,
270    /// The value contained in the tag describes the language of the code, which may be empty.
271    Fenced(SharedString),
272}
273
274impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
275    fn from(tag: pulldown_cmark::Tag) -> Self {
276        match tag {
277            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
278            pulldown_cmark::Tag::Heading {
279                level,
280                id,
281                classes,
282                attrs,
283            } => {
284                let id = id.map(|id| SharedString::from(id.into_string()));
285                let classes = classes
286                    .into_iter()
287                    .map(|c| SharedString::from(c.into_string()))
288                    .collect();
289                let attrs = attrs
290                    .into_iter()
291                    .map(|(key, value)| {
292                        (
293                            SharedString::from(key.into_string()),
294                            value.map(|v| SharedString::from(v.into_string())),
295                        )
296                    })
297                    .collect();
298                MarkdownTag::Heading {
299                    level,
300                    id,
301                    classes,
302                    attrs,
303                }
304            }
305            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
306            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
307                pulldown_cmark::CodeBlockKind::Indented => {
308                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
309                }
310                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
311                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
312                ),
313            },
314            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
315            pulldown_cmark::Tag::Item => MarkdownTag::Item,
316            pulldown_cmark::Tag::FootnoteDefinition(label) => {
317                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
318            }
319            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
320            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
321            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
322            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
323            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
324            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
325            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
326            pulldown_cmark::Tag::Link {
327                link_type,
328                dest_url,
329                title,
330                id,
331            } => MarkdownTag::Link {
332                link_type,
333                dest_url: SharedString::from(dest_url.into_string()),
334                title: SharedString::from(title.into_string()),
335                id: SharedString::from(id.into_string()),
336            },
337            pulldown_cmark::Tag::Image {
338                link_type,
339                dest_url,
340                title,
341                id,
342            } => MarkdownTag::Image {
343                link_type,
344                dest_url: SharedString::from(dest_url.into_string()),
345                title: SharedString::from(title.into_string()),
346                id: SharedString::from(id.into_string()),
347            },
348            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
349            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
350            pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
351            pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
352            pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
353        }
354    }
355}