parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut options = Options::all();
  9    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
 10    options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
 11
 12    let mut events = Vec::new();
 13    let mut within_link = false;
 14    let mut within_metadata = false;
 15    for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
 16        if within_metadata {
 17            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 18                pulldown_event
 19            {
 20                within_metadata = false;
 21            }
 22            continue;
 23        }
 24        match pulldown_event {
 25            pulldown_cmark::Event::Start(tag) => {
 26                match tag {
 27                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 28                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 29                    _ => {}
 30                }
 31                events.push((range, MarkdownEvent::Start(tag.into())))
 32            }
 33            pulldown_cmark::Event::End(tag) => {
 34                if let pulldown_cmark::TagEnd::Link = tag {
 35                    within_link = false;
 36                }
 37                events.push((range, MarkdownEvent::End(tag)));
 38            }
 39            pulldown_cmark::Event::Text(_) => {
 40                // Automatically detect links in text if we're not already within a markdown
 41                // link.
 42                if !within_link {
 43                    let mut finder = LinkFinder::new();
 44                    finder.kinds(&[linkify::LinkKind::Url]);
 45                    let text_range = range.clone();
 46                    for link in finder.links(&text[text_range.clone()]) {
 47                        let link_range =
 48                            text_range.start + link.start()..text_range.start + link.end();
 49
 50                        if link_range.start > range.start {
 51                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 52                        }
 53
 54                        events.push((
 55                            link_range.clone(),
 56                            MarkdownEvent::Start(MarkdownTag::Link {
 57                                link_type: LinkType::Autolink,
 58                                dest_url: SharedString::from(link.as_str().to_string()),
 59                                title: SharedString::default(),
 60                                id: SharedString::default(),
 61                            }),
 62                        ));
 63                        events.push((link_range.clone(), MarkdownEvent::Text));
 64                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 65
 66                        range.start = link_range.end;
 67                    }
 68                }
 69
 70                if range.start < range.end {
 71                    events.push((range, MarkdownEvent::Text));
 72                }
 73            }
 74            pulldown_cmark::Event::Code(_) => {
 75                range.start += 1;
 76                range.end -= 1;
 77                events.push((range, MarkdownEvent::Code))
 78            }
 79            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 80            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 81            pulldown_cmark::Event::FootnoteReference(_) => {
 82                events.push((range, MarkdownEvent::FootnoteReference))
 83            }
 84            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 85            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 86            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 87            pulldown_cmark::Event::TaskListMarker(checked) => {
 88                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 89            }
 90            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
 91        }
 92    }
 93    events
 94}
 95
 96pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
 97    let mut events = Vec::new();
 98    let mut finder = LinkFinder::new();
 99    finder.kinds(&[linkify::LinkKind::Url]);
100    let mut text_range = Range {
101        start: 0,
102        end: text.len(),
103    };
104    for link in finder.links(text) {
105        let link_range = link.start()..link.end();
106
107        if link_range.start > text_range.start {
108            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
109        }
110
111        events.push((
112            link_range.clone(),
113            MarkdownEvent::Start(MarkdownTag::Link {
114                link_type: LinkType::Autolink,
115                dest_url: SharedString::from(link.as_str().to_string()),
116                title: SharedString::default(),
117                id: SharedString::default(),
118            }),
119        ));
120        events.push((link_range.clone(), MarkdownEvent::Text));
121        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
122
123        text_range.start = link_range.end;
124    }
125
126    if text_range.end > text_range.start {
127        events.push((text_range, MarkdownEvent::Text));
128    }
129
130    events
131}
132
133/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
134/// parse result for rendering without resorting to unsafe lifetime coercion.
135#[derive(Clone, Debug, PartialEq)]
136pub enum MarkdownEvent {
137    /// Start of a tagged element. Events that are yielded after this event
138    /// and before its corresponding `End` event are inside this element.
139    /// Start and end events are guaranteed to be balanced.
140    Start(MarkdownTag),
141    /// End of a tagged element.
142    End(MarkdownTagEnd),
143    /// A text node.
144    Text,
145    /// An inline code node.
146    Code,
147    /// An HTML node.
148    Html,
149    /// An inline HTML node.
150    InlineHtml,
151    /// A reference to a footnote with given label, which may or may not be defined
152    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
153    /// occur in any order.
154    FootnoteReference,
155    /// A soft line break.
156    SoftBreak,
157    /// A hard line break.
158    HardBreak,
159    /// A horizontal ruler.
160    Rule,
161    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
162    TaskListMarker(bool),
163}
164
165/// Tags for elements that can contain other elements.
166#[derive(Clone, Debug, PartialEq)]
167pub enum MarkdownTag {
168    /// A paragraph of text and other inline elements.
169    Paragraph,
170
171    /// A heading, with optional identifier, classes and custom attributes.
172    /// The identifier is prefixed with `#` and the last one in the attributes
173    /// list is chosen, classes are prefixed with `.` and custom attributes
174    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
175    Heading {
176        level: HeadingLevel,
177        id: Option<SharedString>,
178        classes: Vec<SharedString>,
179        /// The first item of the tuple is the attr and second one the value.
180        attrs: Vec<(SharedString, Option<SharedString>)>,
181    },
182
183    BlockQuote,
184
185    /// A code block.
186    CodeBlock(CodeBlockKind),
187
188    /// A HTML block.
189    HtmlBlock,
190
191    /// A list. If the list is ordered the field indicates the number of the first item.
192    /// Contains only list items.
193    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
194
195    /// A list item.
196    Item,
197
198    /// A footnote definition. The value contained is the footnote's label by which it can
199    /// be referred to.
200    FootnoteDefinition(SharedString),
201
202    /// A table. Contains a vector describing the text-alignment for each of its columns.
203    Table(Vec<Alignment>),
204
205    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
206    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
207    TableHead,
208
209    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
210    TableRow,
211    TableCell,
212
213    // span-level tags
214    Emphasis,
215    Strong,
216    Strikethrough,
217
218    /// A link.
219    Link {
220        link_type: LinkType,
221        dest_url: SharedString,
222        title: SharedString,
223        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
224        id: SharedString,
225    },
226
227    /// An image. The first field is the link type, the second the destination URL and the third is a title,
228    /// the fourth is the link identifier.
229    Image {
230        link_type: LinkType,
231        dest_url: SharedString,
232        title: SharedString,
233        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
234        id: SharedString,
235    },
236
237    /// A metadata block.
238    MetadataBlock(MetadataBlockKind),
239
240    DefinitionList,
241    DefinitionListTitle,
242    DefinitionListDefinition,
243}
244
245#[derive(Clone, Debug, PartialEq)]
246pub enum CodeBlockKind {
247    Indented,
248    /// The value contained in the tag describes the language of the code, which may be empty.
249    Fenced(SharedString),
250}
251
252impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
253    fn from(tag: pulldown_cmark::Tag) -> Self {
254        match tag {
255            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
256            pulldown_cmark::Tag::Heading {
257                level,
258                id,
259                classes,
260                attrs,
261            } => {
262                let id = id.map(|id| SharedString::from(id.into_string()));
263                let classes = classes
264                    .into_iter()
265                    .map(|c| SharedString::from(c.into_string()))
266                    .collect();
267                let attrs = attrs
268                    .into_iter()
269                    .map(|(key, value)| {
270                        (
271                            SharedString::from(key.into_string()),
272                            value.map(|v| SharedString::from(v.into_string())),
273                        )
274                    })
275                    .collect();
276                MarkdownTag::Heading {
277                    level,
278                    id,
279                    classes,
280                    attrs,
281                }
282            }
283            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
284            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
285                pulldown_cmark::CodeBlockKind::Indented => {
286                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
287                }
288                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
289                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
290                ),
291            },
292            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
293            pulldown_cmark::Tag::Item => MarkdownTag::Item,
294            pulldown_cmark::Tag::FootnoteDefinition(label) => {
295                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
296            }
297            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
298            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
299            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
300            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
301            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
302            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
303            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
304            pulldown_cmark::Tag::Link {
305                link_type,
306                dest_url,
307                title,
308                id,
309            } => MarkdownTag::Link {
310                link_type,
311                dest_url: SharedString::from(dest_url.into_string()),
312                title: SharedString::from(title.into_string()),
313                id: SharedString::from(id.into_string()),
314            },
315            pulldown_cmark::Tag::Image {
316                link_type,
317                dest_url,
318                title,
319                id,
320            } => MarkdownTag::Image {
321                link_type,
322                dest_url: SharedString::from(dest_url.into_string()),
323                title: SharedString::from(title.into_string()),
324                id: SharedString::from(id.into_string()),
325            },
326            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
327            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
328            pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
329            pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
330            pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
331        }
332    }
333}