parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut events = Vec::new();
  9    let mut within_link = false;
 10    let mut within_metadata = false;
 11    for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
 12        if within_metadata {
 13            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 14                pulldown_event
 15            {
 16                within_metadata = false;
 17            }
 18            continue;
 19        }
 20        match pulldown_event {
 21            pulldown_cmark::Event::Start(tag) => {
 22                match tag {
 23                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 24                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 25                    _ => {}
 26                }
 27                events.push((range, MarkdownEvent::Start(tag.into())))
 28            }
 29            pulldown_cmark::Event::End(tag) => {
 30                if let pulldown_cmark::TagEnd::Link = tag {
 31                    within_link = false;
 32                }
 33                events.push((range, MarkdownEvent::End(tag)));
 34            }
 35            pulldown_cmark::Event::Text(_) => {
 36                // Automatically detect links in text if we're not already within a markdown
 37                // link.
 38                if !within_link {
 39                    let mut finder = LinkFinder::new();
 40                    finder.kinds(&[linkify::LinkKind::Url]);
 41                    let text_range = range.clone();
 42                    for link in finder.links(&text[text_range.clone()]) {
 43                        let link_range =
 44                            text_range.start + link.start()..text_range.start + link.end();
 45
 46                        if link_range.start > range.start {
 47                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 48                        }
 49
 50                        events.push((
 51                            link_range.clone(),
 52                            MarkdownEvent::Start(MarkdownTag::Link {
 53                                link_type: LinkType::Autolink,
 54                                dest_url: SharedString::from(link.as_str().to_string()),
 55                                title: SharedString::default(),
 56                                id: SharedString::default(),
 57                            }),
 58                        ));
 59                        events.push((link_range.clone(), MarkdownEvent::Text));
 60                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 61
 62                        range.start = link_range.end;
 63                    }
 64                }
 65
 66                if range.start < range.end {
 67                    events.push((range, MarkdownEvent::Text));
 68                }
 69            }
 70            pulldown_cmark::Event::Code(_) => {
 71                range.start += 1;
 72                range.end -= 1;
 73                events.push((range, MarkdownEvent::Code))
 74            }
 75            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 76            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 77            pulldown_cmark::Event::FootnoteReference(_) => {
 78                events.push((range, MarkdownEvent::FootnoteReference))
 79            }
 80            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 81            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 82            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 83            pulldown_cmark::Event::TaskListMarker(checked) => {
 84                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 85            }
 86        }
 87    }
 88    events
 89}
 90
 91pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
 92    let mut events = Vec::new();
 93    let mut finder = LinkFinder::new();
 94    finder.kinds(&[linkify::LinkKind::Url]);
 95    let mut text_range = Range {
 96        start: 0,
 97        end: text.len(),
 98    };
 99    for link in finder.links(&text[text_range.clone()]) {
100        let link_range = text_range.start + link.start()..text_range.start + link.end();
101
102        if link_range.start > text_range.start {
103            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
104        }
105
106        events.push((
107            link_range.clone(),
108            MarkdownEvent::Start(MarkdownTag::Link {
109                link_type: LinkType::Autolink,
110                dest_url: SharedString::from(link.as_str().to_string()),
111                title: SharedString::default(),
112                id: SharedString::default(),
113            }),
114        ));
115        events.push((link_range.clone(), MarkdownEvent::Text));
116        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
117
118        text_range.start = link_range.end;
119    }
120
121    events.push((text_range, MarkdownEvent::Text));
122
123    events
124}
125
126/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
127/// parse result for rendering without resorting to unsafe lifetime coercion.
128#[derive(Clone, Debug, PartialEq)]
129pub enum MarkdownEvent {
130    /// Start of a tagged element. Events that are yielded after this event
131    /// and before its corresponding `End` event are inside this element.
132    /// Start and end events are guaranteed to be balanced.
133    Start(MarkdownTag),
134    /// End of a tagged element.
135    End(MarkdownTagEnd),
136    /// A text node.
137    Text,
138    /// An inline code node.
139    Code,
140    /// An HTML node.
141    Html,
142    /// An inline HTML node.
143    InlineHtml,
144    /// A reference to a footnote with given label, which may or may not be defined
145    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
146    /// occur in any order.
147    FootnoteReference,
148    /// A soft line break.
149    SoftBreak,
150    /// A hard line break.
151    HardBreak,
152    /// A horizontal ruler.
153    Rule,
154    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
155    TaskListMarker(bool),
156}
157
158/// Tags for elements that can contain other elements.
159#[derive(Clone, Debug, PartialEq)]
160pub enum MarkdownTag {
161    /// A paragraph of text and other inline elements.
162    Paragraph,
163
164    /// A heading, with optional identifier, classes and custom attributes.
165    /// The identifier is prefixed with `#` and the last one in the attributes
166    /// list is chosen, classes are prefixed with `.` and custom attributes
167    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
168    Heading {
169        level: HeadingLevel,
170        id: Option<SharedString>,
171        classes: Vec<SharedString>,
172        /// The first item of the tuple is the attr and second one the value.
173        attrs: Vec<(SharedString, Option<SharedString>)>,
174    },
175
176    BlockQuote,
177
178    /// A code block.
179    CodeBlock(CodeBlockKind),
180
181    /// A HTML block.
182    HtmlBlock,
183
184    /// A list. If the list is ordered the field indicates the number of the first item.
185    /// Contains only list items.
186    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
187
188    /// A list item.
189    Item,
190
191    /// A footnote definition. The value contained is the footnote's label by which it can
192    /// be referred to.
193    FootnoteDefinition(SharedString),
194
195    /// A table. Contains a vector describing the text-alignment for each of its columns.
196    Table(Vec<Alignment>),
197
198    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
199    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
200    TableHead,
201
202    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
203    TableRow,
204    TableCell,
205
206    // span-level tags
207    Emphasis,
208    Strong,
209    Strikethrough,
210
211    /// A link.
212    Link {
213        link_type: LinkType,
214        dest_url: SharedString,
215        title: SharedString,
216        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
217        id: SharedString,
218    },
219
220    /// An image. The first field is the link type, the second the destination URL and the third is a title,
221    /// the fourth is the link identifier.
222    Image {
223        link_type: LinkType,
224        dest_url: SharedString,
225        title: SharedString,
226        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
227        id: SharedString,
228    },
229
230    /// A metadata block.
231    MetadataBlock(MetadataBlockKind),
232}
233
234#[derive(Clone, Debug, PartialEq)]
235pub enum CodeBlockKind {
236    Indented,
237    /// The value contained in the tag describes the language of the code, which may be empty.
238    Fenced(SharedString),
239}
240
241impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
242    fn from(tag: pulldown_cmark::Tag) -> Self {
243        match tag {
244            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
245            pulldown_cmark::Tag::Heading {
246                level,
247                id,
248                classes,
249                attrs,
250            } => {
251                let id = id.map(|id| SharedString::from(id.into_string()));
252                let classes = classes
253                    .into_iter()
254                    .map(|c| SharedString::from(c.into_string()))
255                    .collect();
256                let attrs = attrs
257                    .into_iter()
258                    .map(|(key, value)| {
259                        (
260                            SharedString::from(key.into_string()),
261                            value.map(|v| SharedString::from(v.into_string())),
262                        )
263                    })
264                    .collect();
265                MarkdownTag::Heading {
266                    level,
267                    id,
268                    classes,
269                    attrs,
270                }
271            }
272            pulldown_cmark::Tag::BlockQuote => MarkdownTag::BlockQuote,
273            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
274                pulldown_cmark::CodeBlockKind::Indented => {
275                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
276                }
277                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
278                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
279                ),
280            },
281            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
282            pulldown_cmark::Tag::Item => MarkdownTag::Item,
283            pulldown_cmark::Tag::FootnoteDefinition(label) => {
284                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
285            }
286            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
287            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
288            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
289            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
290            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
291            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
292            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
293            pulldown_cmark::Tag::Link {
294                link_type,
295                dest_url,
296                title,
297                id,
298            } => MarkdownTag::Link {
299                link_type,
300                dest_url: SharedString::from(dest_url.into_string()),
301                title: SharedString::from(title.into_string()),
302                id: SharedString::from(id.into_string()),
303            },
304            pulldown_cmark::Tag::Image {
305                link_type,
306                dest_url,
307                title,
308                id,
309            } => MarkdownTag::Image {
310                link_type,
311                dest_url: SharedString::from(dest_url.into_string()),
312                title: SharedString::from(title.into_string()),
313                id: SharedString::from(id.into_string()),
314            },
315            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
316            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
317        }
318    }
319}