1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::{collections::HashSet, ops::Range};
6
7const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
8 .union(Options::ENABLE_FOOTNOTES)
9 .union(Options::ENABLE_STRIKETHROUGH)
10 .union(Options::ENABLE_TASKLISTS)
11 .union(Options::ENABLE_SMART_PUNCTUATION)
12 .union(Options::ENABLE_HEADING_ATTRIBUTES)
13 .union(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS)
14 .union(Options::ENABLE_OLD_FOOTNOTES)
15 .union(Options::ENABLE_GFM);
16
17pub fn parse_markdown(text: &str) -> (Vec<(Range<usize>, MarkdownEvent)>, HashSet<SharedString>) {
18 let mut events = Vec::new();
19 let mut languages = HashSet::new();
20 let mut within_link = false;
21 let mut within_metadata = false;
22 for (pulldown_event, mut range) in Parser::new_ext(text, PARSE_OPTIONS).into_offset_iter() {
23 if within_metadata {
24 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
25 pulldown_event
26 {
27 within_metadata = false;
28 }
29 continue;
30 }
31 match pulldown_event {
32 pulldown_cmark::Event::Start(tag) => {
33 match tag {
34 pulldown_cmark::Tag::Link { .. } => within_link = true,
35 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
36 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
37 ref language,
38 )) => {
39 languages.insert(SharedString::from(language.to_string()));
40 }
41 _ => {}
42 }
43 events.push((range, MarkdownEvent::Start(tag.into())))
44 }
45 pulldown_cmark::Event::End(tag) => {
46 if let pulldown_cmark::TagEnd::Link = tag {
47 within_link = false;
48 }
49 events.push((range, MarkdownEvent::End(tag)));
50 }
51 pulldown_cmark::Event::Text(parsed) => {
52 // Automatically detect links in text if we're not already within a markdown
53 // link.
54 let mut parsed = parsed.as_ref();
55 if !within_link {
56 let mut finder = LinkFinder::new();
57 finder.kinds(&[linkify::LinkKind::Url]);
58 let text_range = range.clone();
59 for link in finder.links(&text[text_range.clone()]) {
60 let link_range =
61 text_range.start + link.start()..text_range.start + link.end();
62
63 if link_range.start > range.start {
64 let (text, tail) = parsed.split_at(link_range.start - range.start);
65 events.push((
66 range.start..link_range.start,
67 MarkdownEvent::Text(SharedString::new(text)),
68 ));
69 parsed = tail;
70 }
71
72 events.push((
73 link_range.clone(),
74 MarkdownEvent::Start(MarkdownTag::Link {
75 link_type: LinkType::Autolink,
76 dest_url: SharedString::from(link.as_str().to_string()),
77 title: SharedString::default(),
78 id: SharedString::default(),
79 }),
80 ));
81
82 let (link_text, tail) = parsed.split_at(link_range.end - link_range.start);
83 events.push((
84 link_range.clone(),
85 MarkdownEvent::Text(SharedString::new(link_text)),
86 ));
87 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
88
89 range.start = link_range.end;
90 parsed = tail;
91 }
92 }
93 if range.start < range.end {
94 events.push((range, MarkdownEvent::Text(SharedString::new(parsed))));
95 }
96 }
97 pulldown_cmark::Event::Code(_) => {
98 range.start += 1;
99 range.end -= 1;
100 events.push((range, MarkdownEvent::Code))
101 }
102 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
103 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
104 pulldown_cmark::Event::FootnoteReference(_) => {
105 events.push((range, MarkdownEvent::FootnoteReference))
106 }
107 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
108 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
109 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
110 pulldown_cmark::Event::TaskListMarker(checked) => {
111 events.push((range, MarkdownEvent::TaskListMarker(checked)))
112 }
113 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
114 }
115 }
116 (events, languages)
117}
118
119pub fn parse_links_only(mut text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
120 let mut events = Vec::new();
121 let mut finder = LinkFinder::new();
122 finder.kinds(&[linkify::LinkKind::Url]);
123 let mut text_range = Range {
124 start: 0,
125 end: text.len(),
126 };
127 for link in finder.links(text) {
128 let link_range = link.start()..link.end();
129
130 if link_range.start > text_range.start {
131 let (head, tail) = text.split_at(link_range.start - text_range.start);
132 events.push((
133 text_range.start..link_range.start,
134 MarkdownEvent::Text(SharedString::new(head)),
135 ));
136 text = tail;
137 }
138
139 let (link_text, tail) = text.split_at(link_range.end - link_range.start);
140 events.push((
141 link_range.clone(),
142 MarkdownEvent::Start(MarkdownTag::Link {
143 link_type: LinkType::Autolink,
144 dest_url: SharedString::from(link.as_str().to_string()),
145 title: SharedString::default(),
146 id: SharedString::default(),
147 }),
148 ));
149 events.push((
150 link_range.clone(),
151 MarkdownEvent::Text(SharedString::new(link_text)),
152 ));
153 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
154
155 text_range.start = link_range.end;
156 text = tail;
157 }
158
159 if text_range.end > text_range.start {
160 events.push((text_range, MarkdownEvent::Text(SharedString::new(text))));
161 }
162
163 events
164}
165
166/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
167/// parse result for rendering without resorting to unsafe lifetime coercion.
168#[derive(Clone, Debug, PartialEq)]
169pub enum MarkdownEvent {
170 /// Start of a tagged element. Events that are yielded after this event
171 /// and before its corresponding `End` event are inside this element.
172 /// Start and end events are guaranteed to be balanced.
173 Start(MarkdownTag),
174 /// End of a tagged element.
175 End(MarkdownTagEnd),
176 /// A text node.
177 Text(SharedString),
178 /// An inline code node.
179 Code,
180 /// An HTML node.
181 Html,
182 /// An inline HTML node.
183 InlineHtml,
184 /// A reference to a footnote with given label, which may or may not be defined
185 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
186 /// occur in any order.
187 FootnoteReference,
188 /// A soft line break.
189 SoftBreak,
190 /// A hard line break.
191 HardBreak,
192 /// A horizontal ruler.
193 Rule,
194 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
195 TaskListMarker(bool),
196}
197
198/// Tags for elements that can contain other elements.
199#[derive(Clone, Debug, PartialEq)]
200pub enum MarkdownTag {
201 /// A paragraph of text and other inline elements.
202 Paragraph,
203
204 /// A heading, with optional identifier, classes and custom attributes.
205 /// The identifier is prefixed with `#` and the last one in the attributes
206 /// list is chosen, classes are prefixed with `.` and custom attributes
207 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
208 Heading {
209 level: HeadingLevel,
210 id: Option<SharedString>,
211 classes: Vec<SharedString>,
212 /// The first item of the tuple is the attr and second one the value.
213 attrs: Vec<(SharedString, Option<SharedString>)>,
214 },
215
216 BlockQuote,
217
218 /// A code block.
219 CodeBlock(CodeBlockKind),
220
221 /// A HTML block.
222 HtmlBlock,
223
224 /// A list. If the list is ordered the field indicates the number of the first item.
225 /// Contains only list items.
226 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
227
228 /// A list item.
229 Item,
230
231 /// A footnote definition. The value contained is the footnote's label by which it can
232 /// be referred to.
233 FootnoteDefinition(SharedString),
234
235 /// A table. Contains a vector describing the text-alignment for each of its columns.
236 Table(Vec<Alignment>),
237
238 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
239 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
240 TableHead,
241
242 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
243 TableRow,
244 TableCell,
245
246 // span-level tags
247 Emphasis,
248 Strong,
249 Strikethrough,
250
251 /// A link.
252 Link {
253 link_type: LinkType,
254 dest_url: SharedString,
255 title: SharedString,
256 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
257 id: SharedString,
258 },
259
260 /// An image. The first field is the link type, the second the destination URL and the third is a title,
261 /// the fourth is the link identifier.
262 Image {
263 link_type: LinkType,
264 dest_url: SharedString,
265 title: SharedString,
266 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
267 id: SharedString,
268 },
269
270 /// A metadata block.
271 MetadataBlock(MetadataBlockKind),
272
273 DefinitionList,
274 DefinitionListTitle,
275 DefinitionListDefinition,
276}
277
278#[derive(Clone, Debug, PartialEq)]
279pub enum CodeBlockKind {
280 Indented,
281 /// The value contained in the tag describes the language of the code, which may be empty.
282 Fenced(SharedString),
283}
284
285impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
286 fn from(tag: pulldown_cmark::Tag) -> Self {
287 match tag {
288 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
289 pulldown_cmark::Tag::Heading {
290 level,
291 id,
292 classes,
293 attrs,
294 } => {
295 let id = id.map(|id| SharedString::from(id.into_string()));
296 let classes = classes
297 .into_iter()
298 .map(|c| SharedString::from(c.into_string()))
299 .collect();
300 let attrs = attrs
301 .into_iter()
302 .map(|(key, value)| {
303 (
304 SharedString::from(key.into_string()),
305 value.map(|v| SharedString::from(v.into_string())),
306 )
307 })
308 .collect();
309 MarkdownTag::Heading {
310 level,
311 id,
312 classes,
313 attrs,
314 }
315 }
316 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
317 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
318 pulldown_cmark::CodeBlockKind::Indented => {
319 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
320 }
321 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
322 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
323 ),
324 },
325 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
326 pulldown_cmark::Tag::Item => MarkdownTag::Item,
327 pulldown_cmark::Tag::FootnoteDefinition(label) => {
328 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
329 }
330 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
331 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
332 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
333 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
334 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
335 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
336 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
337 pulldown_cmark::Tag::Link {
338 link_type,
339 dest_url,
340 title,
341 id,
342 } => MarkdownTag::Link {
343 link_type,
344 dest_url: SharedString::from(dest_url.into_string()),
345 title: SharedString::from(title.into_string()),
346 id: SharedString::from(id.into_string()),
347 },
348 pulldown_cmark::Tag::Image {
349 link_type,
350 dest_url,
351 title,
352 id,
353 } => MarkdownTag::Image {
354 link_type,
355 dest_url: SharedString::from(dest_url.into_string()),
356 title: SharedString::from(title.into_string()),
357 id: SharedString::from(id.into_string()),
358 },
359 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
360 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
361 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
362 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
363 pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
364 }
365 }
366}
367
368#[cfg(test)]
369mod tests {
370 use super::*;
371
372 const UNWANTED_OPTIONS: Options = Options::ENABLE_YAML_STYLE_METADATA_BLOCKS
373 .union(Options::ENABLE_MATH)
374 .union(Options::ENABLE_DEFINITION_LIST);
375
376 #[test]
377 fn all_options_considered() {
378 // The purpose of this is to fail when new options are added to pulldown_cmark, so that they
379 // can be evaluated for inclusion.
380 assert_eq!(PARSE_OPTIONS.union(UNWANTED_OPTIONS), Options::all());
381 }
382
383 #[test]
384 fn wanted_and_unwanted_options_disjoint() {
385 assert_eq!(
386 PARSE_OPTIONS.intersection(UNWANTED_OPTIONS),
387 Options::empty()
388 );
389 }
390}