parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
  5use std::ops::Range;
  6
  7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
  8    let mut options = Options::all();
  9    options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
 10
 11    let mut events = Vec::new();
 12    let mut within_link = false;
 13    let mut within_metadata = false;
 14    for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
 15        if within_metadata {
 16            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 17                pulldown_event
 18            {
 19                within_metadata = false;
 20            }
 21            continue;
 22        }
 23        match pulldown_event {
 24            pulldown_cmark::Event::Start(tag) => {
 25                match tag {
 26                    pulldown_cmark::Tag::Link { .. } => within_link = true,
 27                    pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
 28                    _ => {}
 29                }
 30                events.push((range, MarkdownEvent::Start(tag.into())))
 31            }
 32            pulldown_cmark::Event::End(tag) => {
 33                if let pulldown_cmark::TagEnd::Link = tag {
 34                    within_link = false;
 35                }
 36                events.push((range, MarkdownEvent::End(tag)));
 37            }
 38            pulldown_cmark::Event::Text(_) => {
 39                // Automatically detect links in text if we're not already within a markdown
 40                // link.
 41                if !within_link {
 42                    let mut finder = LinkFinder::new();
 43                    finder.kinds(&[linkify::LinkKind::Url]);
 44                    let text_range = range.clone();
 45                    for link in finder.links(&text[text_range.clone()]) {
 46                        let link_range =
 47                            text_range.start + link.start()..text_range.start + link.end();
 48
 49                        if link_range.start > range.start {
 50                            events.push((range.start..link_range.start, MarkdownEvent::Text));
 51                        }
 52
 53                        events.push((
 54                            link_range.clone(),
 55                            MarkdownEvent::Start(MarkdownTag::Link {
 56                                link_type: LinkType::Autolink,
 57                                dest_url: SharedString::from(link.as_str().to_string()),
 58                                title: SharedString::default(),
 59                                id: SharedString::default(),
 60                            }),
 61                        ));
 62                        events.push((link_range.clone(), MarkdownEvent::Text));
 63                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
 64
 65                        range.start = link_range.end;
 66                    }
 67                }
 68
 69                if range.start < range.end {
 70                    events.push((range, MarkdownEvent::Text));
 71                }
 72            }
 73            pulldown_cmark::Event::Code(_) => {
 74                range.start += 1;
 75                range.end -= 1;
 76                events.push((range, MarkdownEvent::Code))
 77            }
 78            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
 79            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
 80            pulldown_cmark::Event::FootnoteReference(_) => {
 81                events.push((range, MarkdownEvent::FootnoteReference))
 82            }
 83            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
 84            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
 85            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
 86            pulldown_cmark::Event::TaskListMarker(checked) => {
 87                events.push((range, MarkdownEvent::TaskListMarker(checked)))
 88            }
 89            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
 90        }
 91    }
 92    events
 93}
 94
 95pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
 96    let mut events = Vec::new();
 97    let mut finder = LinkFinder::new();
 98    finder.kinds(&[linkify::LinkKind::Url]);
 99    let mut text_range = Range {
100        start: 0,
101        end: text.len(),
102    };
103    for link in finder.links(text) {
104        let link_range = link.start()..link.end();
105
106        if link_range.start > text_range.start {
107            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
108        }
109
110        events.push((
111            link_range.clone(),
112            MarkdownEvent::Start(MarkdownTag::Link {
113                link_type: LinkType::Autolink,
114                dest_url: SharedString::from(link.as_str().to_string()),
115                title: SharedString::default(),
116                id: SharedString::default(),
117            }),
118        ));
119        events.push((link_range.clone(), MarkdownEvent::Text));
120        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
121
122        text_range.start = link_range.end;
123    }
124
125    if text_range.end > text_range.start {
126        events.push((text_range, MarkdownEvent::Text));
127    }
128
129    events
130}
131
132/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
133/// parse result for rendering without resorting to unsafe lifetime coercion.
134#[derive(Clone, Debug, PartialEq)]
135pub enum MarkdownEvent {
136    /// Start of a tagged element. Events that are yielded after this event
137    /// and before its corresponding `End` event are inside this element.
138    /// Start and end events are guaranteed to be balanced.
139    Start(MarkdownTag),
140    /// End of a tagged element.
141    End(MarkdownTagEnd),
142    /// A text node.
143    Text,
144    /// An inline code node.
145    Code,
146    /// An HTML node.
147    Html,
148    /// An inline HTML node.
149    InlineHtml,
150    /// A reference to a footnote with given label, which may or may not be defined
151    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
152    /// occur in any order.
153    FootnoteReference,
154    /// A soft line break.
155    SoftBreak,
156    /// A hard line break.
157    HardBreak,
158    /// A horizontal ruler.
159    Rule,
160    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
161    TaskListMarker(bool),
162}
163
164/// Tags for elements that can contain other elements.
165#[derive(Clone, Debug, PartialEq)]
166pub enum MarkdownTag {
167    /// A paragraph of text and other inline elements.
168    Paragraph,
169
170    /// A heading, with optional identifier, classes and custom attributes.
171    /// The identifier is prefixed with `#` and the last one in the attributes
172    /// list is chosen, classes are prefixed with `.` and custom attributes
173    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
174    Heading {
175        level: HeadingLevel,
176        id: Option<SharedString>,
177        classes: Vec<SharedString>,
178        /// The first item of the tuple is the attr and second one the value.
179        attrs: Vec<(SharedString, Option<SharedString>)>,
180    },
181
182    BlockQuote,
183
184    /// A code block.
185    CodeBlock(CodeBlockKind),
186
187    /// A HTML block.
188    HtmlBlock,
189
190    /// A list. If the list is ordered the field indicates the number of the first item.
191    /// Contains only list items.
192    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
193
194    /// A list item.
195    Item,
196
197    /// A footnote definition. The value contained is the footnote's label by which it can
198    /// be referred to.
199    FootnoteDefinition(SharedString),
200
201    /// A table. Contains a vector describing the text-alignment for each of its columns.
202    Table(Vec<Alignment>),
203
204    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
205    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
206    TableHead,
207
208    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
209    TableRow,
210    TableCell,
211
212    // span-level tags
213    Emphasis,
214    Strong,
215    Strikethrough,
216
217    /// A link.
218    Link {
219        link_type: LinkType,
220        dest_url: SharedString,
221        title: SharedString,
222        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
223        id: SharedString,
224    },
225
226    /// An image. The first field is the link type, the second the destination URL and the third is a title,
227    /// the fourth is the link identifier.
228    Image {
229        link_type: LinkType,
230        dest_url: SharedString,
231        title: SharedString,
232        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
233        id: SharedString,
234    },
235
236    /// A metadata block.
237    MetadataBlock(MetadataBlockKind),
238
239    DefinitionList,
240    DefinitionListTitle,
241    DefinitionListDefinition,
242}
243
244#[derive(Clone, Debug, PartialEq)]
245pub enum CodeBlockKind {
246    Indented,
247    /// The value contained in the tag describes the language of the code, which may be empty.
248    Fenced(SharedString),
249}
250
251impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
252    fn from(tag: pulldown_cmark::Tag) -> Self {
253        match tag {
254            pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
255            pulldown_cmark::Tag::Heading {
256                level,
257                id,
258                classes,
259                attrs,
260            } => {
261                let id = id.map(|id| SharedString::from(id.into_string()));
262                let classes = classes
263                    .into_iter()
264                    .map(|c| SharedString::from(c.into_string()))
265                    .collect();
266                let attrs = attrs
267                    .into_iter()
268                    .map(|(key, value)| {
269                        (
270                            SharedString::from(key.into_string()),
271                            value.map(|v| SharedString::from(v.into_string())),
272                        )
273                    })
274                    .collect();
275                MarkdownTag::Heading {
276                    level,
277                    id,
278                    classes,
279                    attrs,
280                }
281            }
282            pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
283            pulldown_cmark::Tag::CodeBlock(kind) => match kind {
284                pulldown_cmark::CodeBlockKind::Indented => {
285                    MarkdownTag::CodeBlock(CodeBlockKind::Indented)
286                }
287                pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
288                    CodeBlockKind::Fenced(SharedString::from(info.into_string())),
289                ),
290            },
291            pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
292            pulldown_cmark::Tag::Item => MarkdownTag::Item,
293            pulldown_cmark::Tag::FootnoteDefinition(label) => {
294                MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
295            }
296            pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
297            pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
298            pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
299            pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
300            pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
301            pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
302            pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
303            pulldown_cmark::Tag::Link {
304                link_type,
305                dest_url,
306                title,
307                id,
308            } => MarkdownTag::Link {
309                link_type,
310                dest_url: SharedString::from(dest_url.into_string()),
311                title: SharedString::from(title.into_string()),
312                id: SharedString::from(id.into_string()),
313            },
314            pulldown_cmark::Tag::Image {
315                link_type,
316                dest_url,
317                title,
318                id,
319            } => MarkdownTag::Image {
320                link_type,
321                dest_url: SharedString::from(dest_url.into_string()),
322                title: SharedString::from(title.into_string()),
323                id: SharedString::from(id.into_string()),
324            },
325            pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
326            pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
327            pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
328            pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
329            pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
330        }
331    }
332}