parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut events = Vec::new();
  9    let mut within_link = false;
 10    let mut within_metadata = false;
 11    for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
 12        if within_metadata {
 13            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 14                pulldown_event
 15            {
 16                within_metadata = false;
 17            }
 18            continue;
 19        }
 20        match pulldown_event {
 21            pulldown_cmark::Event::Start(tag) => {
 22                match tag {
 23                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 24                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 25                    _ => {}
 26                }
 27                events.push((range, MarkdownEvent::Start(tag.into())))
 28            }
 29            pulldown_cmark::Event::End(tag) => {
 30                if let pulldown_cmark::TagEnd::Link = tag {
 31                    within_link = false;
 32                }
 33                events.push((range, MarkdownEvent::End(tag)));
 34            }
 35            pulldown_cmark::Event::Text(_) => {
 36                // Automatically detect links in text if we're not already within a markdown
 37                // link.
 38                if !within_link {
 39                    let mut finder = LinkFinder::new();
 40                    finder.kinds(&[linkify::LinkKind::Url]);
 41                    let text_range = range.clone();
 42                    for link in finder.links(&text[text_range.clone()]) {
 43                        let link_range =
 44                            text_range.start + link.start()..text_range.start + link.end();
 45
 46                        if link_range.start > range.start {
 47                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 48                        }
 49
 50                        events.push((
 51                            link_range.clone(),
 52                            MarkdownEvent::Start(MarkdownTag::Link {
 53                                link_type: LinkType::Autolink,
 54                                dest_url: SharedString::from(link.as_str().to_string()),
 55                                title: SharedString::default(),
 56                                id: SharedString::default(),
 57                            }),
 58                        ));
 59                        events.push((link_range.clone(), MarkdownEvent::Text));
 60                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 61
 62                        range.start = link_range.end;
 63                    }
 64                }
 65
 66                if range.start < range.end {
 67                    events.push((range, MarkdownEvent::Text));
 68                }
 69            }
 70            pulldown_cmark::Event::Code(_) => {
 71                range.start += 1;
 72                range.end -= 1;
 73                events.push((range, MarkdownEvent::Code))
 74            }
 75            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 76            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 77            pulldown_cmark::Event::FootnoteReference(_) => {
 78                events.push((range, MarkdownEvent::FootnoteReference))
 79            }
 80            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 81            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 82            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 83            pulldown_cmark::Event::TaskListMarker(checked) => {
 84                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 85            }
 86        }
 87    }
 88    events
 89}
 90
 91pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
 92    let mut events = Vec::new();
 93    let mut finder = LinkFinder::new();
 94    finder.kinds(&[linkify::LinkKind::Url]);
 95    let mut text_range = Range {
 96        start: 0,
 97        end: text.len(),
 98    };
 99    for link in finder.links(&text) {
100        let link_range = link.start()..link.end();
101
102        if link_range.start > text_range.start {
103            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
104        }
105
106        events.push((
107            link_range.clone(),
108            MarkdownEvent::Start(MarkdownTag::Link {
109                link_type: LinkType::Autolink,
110                dest_url: SharedString::from(link.as_str().to_string()),
111                title: SharedString::default(),
112                id: SharedString::default(),
113            }),
114        ));
115        events.push((link_range.clone(), MarkdownEvent::Text));
116        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
117
118        text_range.start = link_range.end;
119    }
120
121    if text_range.end > text_range.start {
122        events.push((text_range, MarkdownEvent::Text));
123    }
124
125    events
126}
127
128/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
129/// parse result for rendering without resorting to unsafe lifetime coercion.
130#[derive(Clone, Debug, PartialEq)]
131pub enum MarkdownEvent {
132    /// Start of a tagged element. Events that are yielded after this event
133    /// and before its corresponding `End` event are inside this element.
134    /// Start and end events are guaranteed to be balanced.
135    Start(MarkdownTag),
136    /// End of a tagged element.
137    End(MarkdownTagEnd),
138    /// A text node.
139    Text,
140    /// An inline code node.
141    Code,
142    /// An HTML node.
143    Html,
144    /// An inline HTML node.
145    InlineHtml,
146    /// A reference to a footnote with given label, which may or may not be defined
147    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
148    /// occur in any order.
149    FootnoteReference,
150    /// A soft line break.
151    SoftBreak,
152    /// A hard line break.
153    HardBreak,
154    /// A horizontal ruler.
155    Rule,
156    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
157    TaskListMarker(bool),
158}
159
160/// Tags for elements that can contain other elements.
161#[derive(Clone, Debug, PartialEq)]
162pub enum MarkdownTag {
163    /// A paragraph of text and other inline elements.
164    Paragraph,
165
166    /// A heading, with optional identifier, classes and custom attributes.
167    /// The identifier is prefixed with `#` and the last one in the attributes
168    /// list is chosen, classes are prefixed with `.` and custom attributes
169    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
170    Heading {
171        level: HeadingLevel,
172        id: Option<SharedString>,
173        classes: Vec<SharedString>,
174        /// The first item of the tuple is the attr and second one the value.
175        attrs: Vec<(SharedString, Option<SharedString>)>,
176    },
177
178    BlockQuote,
179
180    /// A code block.
181    CodeBlock(CodeBlockKind),
182
183    /// A HTML block.
184    HtmlBlock,
185
186    /// A list. If the list is ordered the field indicates the number of the first item.
187    /// Contains only list items.
188    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
189
190    /// A list item.
191    Item,
192
193    /// A footnote definition. The value contained is the footnote's label by which it can
194    /// be referred to.
195    FootnoteDefinition(SharedString),
196
197    /// A table. Contains a vector describing the text-alignment for each of its columns.
198    Table(Vec<Alignment>),
199
200    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
201    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
202    TableHead,
203
204    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
205    TableRow,
206    TableCell,
207
208    // span-level tags
209    Emphasis,
210    Strong,
211    Strikethrough,
212
213    /// A link.
214    Link {
215        link_type: LinkType,
216        dest_url: SharedString,
217        title: SharedString,
218        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
219        id: SharedString,
220    },
221
222    /// An image. The first field is the link type, the second the destination URL and the third is a title,
223    /// the fourth is the link identifier.
224    Image {
225        link_type: LinkType,
226        dest_url: SharedString,
227        title: SharedString,
228        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
229        id: SharedString,
230    },
231
232    /// A metadata block.
233    MetadataBlock(MetadataBlockKind),
234}
235
236#[derive(Clone, Debug, PartialEq)]
237pub enum CodeBlockKind {
238    Indented,
239    /// The value contained in the tag describes the language of the code, which may be empty.
240    Fenced(SharedString),
241}
242
243impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
244    fn from(tag: pulldown_cmark::Tag) -> Self {
245        match tag {
246            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
247            pulldown_cmark::Tag::Heading {
248                level,
249                id,
250                classes,
251                attrs,
252            } => {
253                let id = id.map(|id| SharedString::from(id.into_string()));
254                let classes = classes
255                    .into_iter()
256                    .map(|c| SharedString::from(c.into_string()))
257                    .collect();
258                let attrs = attrs
259                    .into_iter()
260                    .map(|(key, value)| {
261                        (
262                            SharedString::from(key.into_string()),
263                            value.map(|v| SharedString::from(v.into_string())),
264                        )
265                    })
266                    .collect();
267                MarkdownTag::Heading {
268                    level,
269                    id,
270                    classes,
271                    attrs,
272                }
273            }
274            pulldown_cmark::Tag::BlockQuote => MarkdownTag::BlockQuote,
275            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
276                pulldown_cmark::CodeBlockKind::Indented => {
277                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
278                }
279                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
280                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
281                ),
282            },
283            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
284            pulldown_cmark::Tag::Item => MarkdownTag::Item,
285            pulldown_cmark::Tag::FootnoteDefinition(label) => {
286                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
287            }
288            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
289            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
290            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
291            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
292            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
293            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
294            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
295            pulldown_cmark::Tag::Link {
296                link_type,
297                dest_url,
298                title,
299                id,
300            } => MarkdownTag::Link {
301                link_type,
302                dest_url: SharedString::from(dest_url.into_string()),
303                title: SharedString::from(title.into_string()),
304                id: SharedString::from(id.into_string()),
305            },
306            pulldown_cmark::Tag::Image {
307                link_type,
308                dest_url,
309                title,
310                id,
311            } => MarkdownTag::Image {
312                link_type,
313                dest_url: SharedString::from(dest_url.into_string()),
314                title: SharedString::from(title.into_string()),
315                id: SharedString::from(id.into_string()),
316            },
317            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
318            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
319        }
320    }
321}