parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::{collections::HashSet, ops::Range};
  6
  7pub fn parse_markdown(text: &str) -> (Vec<(Range<usize>, MarkdownEvent)>, HashSet<SharedString>) {
  8    let mut options = Options::all();
  9    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
 10    options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
 11    options.remove(pulldown_cmark::Options::ENABLE_MATH);
 12
 13    let mut events = Vec::new();
 14    let mut languages = HashSet::new();
 15    let mut within_link = false;
 16    let mut within_metadata = false;
 17    for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
 18        if within_metadata {
 19            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 20                pulldown_event
 21            {
 22                within_metadata = false;
 23            }
 24            continue;
 25        }
 26        match pulldown_event {
 27            pulldown_cmark::Event::Start(tag) => {
 28                match tag {
 29                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 30                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 31                    pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
 32                        ref language,
 33                    )) => {
 34                        languages.insert(SharedString::from(language.to_string()));
 35                    }
 36                    _ => {}
 37                }
 38                events.push((range, MarkdownEvent::Start(tag.into())))
 39            }
 40            pulldown_cmark::Event::End(tag) => {
 41                if let pulldown_cmark::TagEnd::Link = tag {
 42                    within_link = false;
 43                }
 44                events.push((range, MarkdownEvent::End(tag)));
 45            }
 46            pulldown_cmark::Event::Text(parsed) => {
 47                // Automatically detect links in text if we're not already within a markdown
 48                // link.
 49                let mut parsed = parsed.as_ref();
 50                if !within_link {
 51                    let mut finder = LinkFinder::new();
 52                    finder.kinds(&[linkify::LinkKind::Url]);
 53                    let text_range = range.clone();
 54                    for link in finder.links(&text[text_range.clone()]) {
 55                        let link_range =
 56                            text_range.start + link.start()..text_range.start + link.end();
 57
 58                        if link_range.start > range.start {
 59                            let (text, tail) = parsed.split_at(link_range.start - range.start);
 60                            events.push((
 61                                range.start..link_range.start,
 62                                MarkdownEvent::Text(SharedString::new(text)),
 63                            ));
 64                            parsed = tail;
 65                        }
 66
 67                        events.push((
 68                            link_range.clone(),
 69                            MarkdownEvent::Start(MarkdownTag::Link {
 70                                link_type: LinkType::Autolink,
 71                                dest_url: SharedString::from(link.as_str().to_string()),
 72                                title: SharedString::default(),
 73                                id: SharedString::default(),
 74                            }),
 75                        ));
 76
 77                        let (link_text, tail) = parsed.split_at(link_range.end - link_range.start);
 78                        events.push((
 79                            link_range.clone(),
 80                            MarkdownEvent::Text(SharedString::new(link_text)),
 81                        ));
 82                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 83
 84                        range.start = link_range.end;
 85                        parsed = tail;
 86                    }
 87                }
 88                if range.start < range.end {
 89                    events.push((range, MarkdownEvent::Text(SharedString::new(parsed))));
 90                }
 91            }
 92            pulldown_cmark::Event::Code(_) => {
 93                range.start += 1;
 94                range.end -= 1;
 95                events.push((range, MarkdownEvent::Code))
 96            }
 97            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 98            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 99            pulldown_cmark::Event::FootnoteReference(_) => {
100                events.push((range, MarkdownEvent::FootnoteReference))
101            }
102            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
103            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
104            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
105            pulldown_cmark::Event::TaskListMarker(checked) => {
106                events.push((range, MarkdownEvent::TaskListMarker(checked)))
107            }
108            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
109        }
110    }
111    (events, languages)
112}
113
114pub fn parse_links_only(mut text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
115    let mut events = Vec::new();
116    let mut finder = LinkFinder::new();
117    finder.kinds(&[linkify::LinkKind::Url]);
118    let mut text_range = Range {
119        start: 0,
120        end: text.len(),
121    };
122    for link in finder.links(text) {
123        let link_range = link.start()..link.end();
124
125        if link_range.start > text_range.start {
126            let (head, tail) = text.split_at(link_range.start - text_range.start);
127            events.push((
128                text_range.start..link_range.start,
129                MarkdownEvent::Text(SharedString::new(head)),
130            ));
131            text = tail;
132        }
133
134        let (link_text, tail) = text.split_at(link_range.end - link_range.start);
135        events.push((
136            link_range.clone(),
137            MarkdownEvent::Start(MarkdownTag::Link {
138                link_type: LinkType::Autolink,
139                dest_url: SharedString::from(link.as_str().to_string()),
140                title: SharedString::default(),
141                id: SharedString::default(),
142            }),
143        ));
144        events.push((
145            link_range.clone(),
146            MarkdownEvent::Text(SharedString::new(link_text)),
147        ));
148        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
149
150        text_range.start = link_range.end;
151        text = tail;
152    }
153
154    if text_range.end > text_range.start {
155        events.push((text_range, MarkdownEvent::Text(SharedString::new(text))));
156    }
157
158    events
159}
160
161/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
162/// parse result for rendering without resorting to unsafe lifetime coercion.
163#[derive(Clone, Debug, PartialEq)]
164pub enum MarkdownEvent {
165    /// Start of a tagged element. Events that are yielded after this event
166    /// and before its corresponding `End` event are inside this element.
167    /// Start and end events are guaranteed to be balanced.
168    Start(MarkdownTag),
169    /// End of a tagged element.
170    End(MarkdownTagEnd),
171    /// A text node.
172    Text(SharedString),
173    /// An inline code node.
174    Code,
175    /// An HTML node.
176    Html,
177    /// An inline HTML node.
178    InlineHtml,
179    /// A reference to a footnote with given label, which may or may not be defined
180    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
181    /// occur in any order.
182    FootnoteReference,
183    /// A soft line break.
184    SoftBreak,
185    /// A hard line break.
186    HardBreak,
187    /// A horizontal ruler.
188    Rule,
189    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
190    TaskListMarker(bool),
191}
192
193/// Tags for elements that can contain other elements.
194#[derive(Clone, Debug, PartialEq)]
195pub enum MarkdownTag {
196    /// A paragraph of text and other inline elements.
197    Paragraph,
198
199    /// A heading, with optional identifier, classes and custom attributes.
200    /// The identifier is prefixed with `#` and the last one in the attributes
201    /// list is chosen, classes are prefixed with `.` and custom attributes
202    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
203    Heading {
204        level: HeadingLevel,
205        id: Option<SharedString>,
206        classes: Vec<SharedString>,
207        /// The first item of the tuple is the attr and second one the value.
208        attrs: Vec<(SharedString, Option<SharedString>)>,
209    },
210
211    BlockQuote,
212
213    /// A code block.
214    CodeBlock(CodeBlockKind),
215
216    /// A HTML block.
217    HtmlBlock,
218
219    /// A list. If the list is ordered the field indicates the number of the first item.
220    /// Contains only list items.
221    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
222
223    /// A list item.
224    Item,
225
226    /// A footnote definition. The value contained is the footnote's label by which it can
227    /// be referred to.
228    FootnoteDefinition(SharedString),
229
230    /// A table. Contains a vector describing the text-alignment for each of its columns.
231    Table(Vec<Alignment>),
232
233    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
234    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
235    TableHead,
236
237    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
238    TableRow,
239    TableCell,
240
241    // span-level tags
242    Emphasis,
243    Strong,
244    Strikethrough,
245
246    /// A link.
247    Link {
248        link_type: LinkType,
249        dest_url: SharedString,
250        title: SharedString,
251        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
252        id: SharedString,
253    },
254
255    /// An image. The first field is the link type, the second the destination URL and the third is a title,
256    /// the fourth is the link identifier.
257    Image {
258        link_type: LinkType,
259        dest_url: SharedString,
260        title: SharedString,
261        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
262        id: SharedString,
263    },
264
265    /// A metadata block.
266    MetadataBlock(MetadataBlockKind),
267
268    DefinitionList,
269    DefinitionListTitle,
270    DefinitionListDefinition,
271}
272
273#[derive(Clone, Debug, PartialEq)]
274pub enum CodeBlockKind {
275    Indented,
276    /// The value contained in the tag describes the language of the code, which may be empty.
277    Fenced(SharedString),
278}
279
280impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
281    fn from(tag: pulldown_cmark::Tag) -> Self {
282        match tag {
283            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
284            pulldown_cmark::Tag::Heading {
285                level,
286                id,
287                classes,
288                attrs,
289            } => {
290                let id = id.map(|id| SharedString::from(id.into_string()));
291                let classes = classes
292                    .into_iter()
293                    .map(|c| SharedString::from(c.into_string()))
294                    .collect();
295                let attrs = attrs
296                    .into_iter()
297                    .map(|(key, value)| {
298                        (
299                            SharedString::from(key.into_string()),
300                            value.map(|v| SharedString::from(v.into_string())),
301                        )
302                    })
303                    .collect();
304                MarkdownTag::Heading {
305                    level,
306                    id,
307                    classes,
308                    attrs,
309                }
310            }
311            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
312            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
313                pulldown_cmark::CodeBlockKind::Indented => {
314                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
315                }
316                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
317                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
318                ),
319            },
320            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
321            pulldown_cmark::Tag::Item => MarkdownTag::Item,
322            pulldown_cmark::Tag::FootnoteDefinition(label) => {
323                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
324            }
325            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
326            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
327            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
328            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
329            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
330            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
331            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
332            pulldown_cmark::Tag::Link {
333                link_type,
334                dest_url,
335                title,
336                id,
337            } => MarkdownTag::Link {
338                link_type,
339                dest_url: SharedString::from(dest_url.into_string()),
340                title: SharedString::from(title.into_string()),
341                id: SharedString::from(id.into_string()),
342            },
343            pulldown_cmark::Tag::Image {
344                link_type,
345                dest_url,
346                title,
347                id,
348            } => MarkdownTag::Image {
349                link_type,
350                dest_url: SharedString::from(dest_url.into_string()),
351                title: SharedString::from(title.into_string()),
352                id: SharedString::from(id.into_string()),
353            },
354            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
355            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
356            pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
357            pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
358            pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
359        }
360    }
361}