parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut events = Vec::new();
  9    let mut within_link = false;
 10    let mut within_metadata = false;
 11    for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
 12        if within_metadata {
 13            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 14                pulldown_event
 15            {
 16                within_metadata = false;
 17            }
 18            continue;
 19        }
 20        match pulldown_event {
 21            pulldown_cmark::Event::Start(tag) => {
 22                match tag {
 23                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 24                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 25                    _ => {}
 26                }
 27                events.push((range, MarkdownEvent::Start(tag.into())))
 28            }
 29            pulldown_cmark::Event::End(tag) => {
 30                if let pulldown_cmark::TagEnd::Link = tag {
 31                    within_link = false;
 32                }
 33                events.push((range, MarkdownEvent::End(tag)));
 34            }
 35            pulldown_cmark::Event::Text(_) => {
 36                // Automatically detect links in text if we're not already within a markdown
 37                // link.
 38                if !within_link {
 39                    let mut finder = LinkFinder::new();
 40                    finder.kinds(&[linkify::LinkKind::Url]);
 41                    let text_range = range.clone();
 42                    for link in finder.links(&text[text_range.clone()]) {
 43                        let link_range =
 44                            text_range.start + link.start()..text_range.start + link.end();
 45
 46                        if link_range.start > range.start {
 47                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 48                        }
 49
 50                        events.push((
 51                            link_range.clone(),
 52                            MarkdownEvent::Start(MarkdownTag::Link {
 53                                link_type: LinkType::Autolink,
 54                                dest_url: SharedString::from(link.as_str().to_string()),
 55                                title: SharedString::default(),
 56                                id: SharedString::default(),
 57                            }),
 58                        ));
 59                        events.push((link_range.clone(), MarkdownEvent::Text));
 60                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 61
 62                        range.start = link_range.end;
 63                    }
 64                }
 65
 66                if range.start < range.end {
 67                    events.push((range, MarkdownEvent::Text));
 68                }
 69            }
 70            pulldown_cmark::Event::Code(_) => {
 71                range.start += 1;
 72                range.end -= 1;
 73                events.push((range, MarkdownEvent::Code))
 74            }
 75            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 76            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 77            pulldown_cmark::Event::FootnoteReference(_) => {
 78                events.push((range, MarkdownEvent::FootnoteReference))
 79            }
 80            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 81            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 82            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 83            pulldown_cmark::Event::TaskListMarker(checked) => {
 84                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 85            }
 86            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
 87        }
 88    }
 89    events
 90}
 91
 92pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
 93    let mut events = Vec::new();
 94    let mut finder = LinkFinder::new();
 95    finder.kinds(&[linkify::LinkKind::Url]);
 96    let mut text_range = Range {
 97        start: 0,
 98        end: text.len(),
 99    };
100    for link in finder.links(&text) {
101        let link_range = link.start()..link.end();
102
103        if link_range.start > text_range.start {
104            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
105        }
106
107        events.push((
108            link_range.clone(),
109            MarkdownEvent::Start(MarkdownTag::Link {
110                link_type: LinkType::Autolink,
111                dest_url: SharedString::from(link.as_str().to_string()),
112                title: SharedString::default(),
113                id: SharedString::default(),
114            }),
115        ));
116        events.push((link_range.clone(), MarkdownEvent::Text));
117        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
118
119        text_range.start = link_range.end;
120    }
121
122    if text_range.end > text_range.start {
123        events.push((text_range, MarkdownEvent::Text));
124    }
125
126    events
127}
128
129/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
130/// parse result for rendering without resorting to unsafe lifetime coercion.
131#[derive(Clone, Debug, PartialEq)]
132pub enum MarkdownEvent {
133    /// Start of a tagged element. Events that are yielded after this event
134    /// and before its corresponding `End` event are inside this element.
135    /// Start and end events are guaranteed to be balanced.
136    Start(MarkdownTag),
137    /// End of a tagged element.
138    End(MarkdownTagEnd),
139    /// A text node.
140    Text,
141    /// An inline code node.
142    Code,
143    /// An HTML node.
144    Html,
145    /// An inline HTML node.
146    InlineHtml,
147    /// A reference to a footnote with given label, which may or may not be defined
148    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
149    /// occur in any order.
150    FootnoteReference,
151    /// A soft line break.
152    SoftBreak,
153    /// A hard line break.
154    HardBreak,
155    /// A horizontal ruler.
156    Rule,
157    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
158    TaskListMarker(bool),
159}
160
161/// Tags for elements that can contain other elements.
162#[derive(Clone, Debug, PartialEq)]
163pub enum MarkdownTag {
164    /// A paragraph of text and other inline elements.
165    Paragraph,
166
167    /// A heading, with optional identifier, classes and custom attributes.
168    /// The identifier is prefixed with `#` and the last one in the attributes
169    /// list is chosen, classes are prefixed with `.` and custom attributes
170    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
171    Heading {
172        level: HeadingLevel,
173        id: Option<SharedString>,
174        classes: Vec<SharedString>,
175        /// The first item of the tuple is the attr and second one the value.
176        attrs: Vec<(SharedString, Option<SharedString>)>,
177    },
178
179    BlockQuote,
180
181    /// A code block.
182    CodeBlock(CodeBlockKind),
183
184    /// A HTML block.
185    HtmlBlock,
186
187    /// A list. If the list is ordered the field indicates the number of the first item.
188    /// Contains only list items.
189    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
190
191    /// A list item.
192    Item,
193
194    /// A footnote definition. The value contained is the footnote's label by which it can
195    /// be referred to.
196    FootnoteDefinition(SharedString),
197
198    /// A table. Contains a vector describing the text-alignment for each of its columns.
199    Table(Vec<Alignment>),
200
201    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
202    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
203    TableHead,
204
205    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
206    TableRow,
207    TableCell,
208
209    // span-level tags
210    Emphasis,
211    Strong,
212    Strikethrough,
213
214    /// A link.
215    Link {
216        link_type: LinkType,
217        dest_url: SharedString,
218        title: SharedString,
219        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
220        id: SharedString,
221    },
222
223    /// An image. The first field is the link type, the second the destination URL and the third is a title,
224    /// the fourth is the link identifier.
225    Image {
226        link_type: LinkType,
227        dest_url: SharedString,
228        title: SharedString,
229        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
230        id: SharedString,
231    },
232
233    /// A metadata block.
234    MetadataBlock(MetadataBlockKind),
235}
236
237#[derive(Clone, Debug, PartialEq)]
238pub enum CodeBlockKind {
239    Indented,
240    /// The value contained in the tag describes the language of the code, which may be empty.
241    Fenced(SharedString),
242}
243
244impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
245    fn from(tag: pulldown_cmark::Tag) -> Self {
246        match tag {
247            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
248            pulldown_cmark::Tag::Heading {
249                level,
250                id,
251                classes,
252                attrs,
253            } => {
254                let id = id.map(|id| SharedString::from(id.into_string()));
255                let classes = classes
256                    .into_iter()
257                    .map(|c| SharedString::from(c.into_string()))
258                    .collect();
259                let attrs = attrs
260                    .into_iter()
261                    .map(|(key, value)| {
262                        (
263                            SharedString::from(key.into_string()),
264                            value.map(|v| SharedString::from(v.into_string())),
265                        )
266                    })
267                    .collect();
268                MarkdownTag::Heading {
269                    level,
270                    id,
271                    classes,
272                    attrs,
273                }
274            }
275            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
276            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
277                pulldown_cmark::CodeBlockKind::Indented => {
278                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
279                }
280                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
281                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
282                ),
283            },
284            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
285            pulldown_cmark::Tag::Item => MarkdownTag::Item,
286            pulldown_cmark::Tag::FootnoteDefinition(label) => {
287                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
288            }
289            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
290            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
291            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
292            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
293            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
294            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
295            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
296            pulldown_cmark::Tag::Link {
297                link_type,
298                dest_url,
299                title,
300                id,
301            } => MarkdownTag::Link {
302                link_type,
303                dest_url: SharedString::from(dest_url.into_string()),
304                title: SharedString::from(title.into_string()),
305                id: SharedString::from(id.into_string()),
306            },
307            pulldown_cmark::Tag::Image {
308                link_type,
309                dest_url,
310                title,
311                id,
312            } => MarkdownTag::Image {
313                link_type,
314                dest_url: SharedString::from(dest_url.into_string()),
315                title: SharedString::from(title.into_string()),
316                id: SharedString::from(id.into_string()),
317            },
318            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
319            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
320            pulldown_cmark::Tag::DefinitionList
321            | pulldown_cmark::Tag::DefinitionListTitle
322            | pulldown_cmark::Tag::DefinitionListDefinition => {
323                unimplemented!("definition lists are not yet supported")
324            }
325        }
326    }
327}