1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut options = Options::all();
9 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
10 options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
11 options.remove(pulldown_cmark::Options::ENABLE_MATH);
12
13 let mut events = Vec::new();
14 let mut within_link = false;
15 let mut within_metadata = false;
16 for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
17 if within_metadata {
18 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
19 pulldown_event
20 {
21 within_metadata = false;
22 }
23 continue;
24 }
25 match pulldown_event {
26 pulldown_cmark::Event::Start(tag) => {
27 match tag {
28 pulldown_cmark::Tag::Link { .. } => within_link = true,
29 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
30 _ => {}
31 }
32 events.push((range, MarkdownEvent::Start(tag.into())))
33 }
34 pulldown_cmark::Event::End(tag) => {
35 if let pulldown_cmark::TagEnd::Link = tag {
36 within_link = false;
37 }
38 events.push((range, MarkdownEvent::End(tag)));
39 }
40 pulldown_cmark::Event::Text(parsed) => {
41 // Automatically detect links in text if we're not already within a markdown
42 // link.
43 let mut parsed = parsed.as_ref();
44 if !within_link {
45 let mut finder = LinkFinder::new();
46 finder.kinds(&[linkify::LinkKind::Url]);
47 let text_range = range.clone();
48 for link in finder.links(&text[text_range.clone()]) {
49 let link_range =
50 text_range.start + link.start()..text_range.start + link.end();
51
52 if link_range.start > range.start {
53 let (text, tail) = parsed.split_at(link_range.start - range.start);
54 events.push((
55 range.start..link_range.start,
56 MarkdownEvent::Text(SharedString::new(text)),
57 ));
58 parsed = tail;
59 }
60
61 events.push((
62 link_range.clone(),
63 MarkdownEvent::Start(MarkdownTag::Link {
64 link_type: LinkType::Autolink,
65 dest_url: SharedString::from(link.as_str().to_string()),
66 title: SharedString::default(),
67 id: SharedString::default(),
68 }),
69 ));
70
71 let (link_text, tail) = parsed.split_at(link_range.end - link_range.start);
72 events.push((
73 link_range.clone(),
74 MarkdownEvent::Text(SharedString::new(link_text)),
75 ));
76 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
77
78 range.start = link_range.end;
79 parsed = tail;
80 }
81 }
82 if range.start < range.end {
83 events.push((range, MarkdownEvent::Text(SharedString::new(parsed))));
84 }
85 }
86 pulldown_cmark::Event::Code(_) => {
87 range.start += 1;
88 range.end -= 1;
89 events.push((range, MarkdownEvent::Code))
90 }
91 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
92 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
93 pulldown_cmark::Event::FootnoteReference(_) => {
94 events.push((range, MarkdownEvent::FootnoteReference))
95 }
96 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
97 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
98 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
99 pulldown_cmark::Event::TaskListMarker(checked) => {
100 events.push((range, MarkdownEvent::TaskListMarker(checked)))
101 }
102 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
103 }
104 }
105 events
106}
107
108pub fn parse_links_only(mut text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
109 let mut events = Vec::new();
110 let mut finder = LinkFinder::new();
111 finder.kinds(&[linkify::LinkKind::Url]);
112 let mut text_range = Range {
113 start: 0,
114 end: text.len(),
115 };
116 for link in finder.links(text) {
117 let link_range = link.start()..link.end();
118
119 if link_range.start > text_range.start {
120 let (head, tail) = text.split_at(link_range.start - text_range.start);
121 events.push((
122 text_range.start..link_range.start,
123 MarkdownEvent::Text(SharedString::new(head)),
124 ));
125 text = tail;
126 }
127
128 let (link_text, tail) = text.split_at(link_range.end - link_range.start);
129 events.push((
130 link_range.clone(),
131 MarkdownEvent::Start(MarkdownTag::Link {
132 link_type: LinkType::Autolink,
133 dest_url: SharedString::from(link.as_str().to_string()),
134 title: SharedString::default(),
135 id: SharedString::default(),
136 }),
137 ));
138 events.push((
139 link_range.clone(),
140 MarkdownEvent::Text(SharedString::new(link_text)),
141 ));
142 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
143
144 text_range.start = link_range.end;
145 text = tail;
146 }
147
148 if text_range.end > text_range.start {
149 events.push((text_range, MarkdownEvent::Text(SharedString::new(text))));
150 }
151
152 events
153}
154
155/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
156/// parse result for rendering without resorting to unsafe lifetime coercion.
157#[derive(Clone, Debug, PartialEq)]
158pub enum MarkdownEvent {
159 /// Start of a tagged element. Events that are yielded after this event
160 /// and before its corresponding `End` event are inside this element.
161 /// Start and end events are guaranteed to be balanced.
162 Start(MarkdownTag),
163 /// End of a tagged element.
164 End(MarkdownTagEnd),
165 /// A text node.
166 Text(SharedString),
167 /// An inline code node.
168 Code,
169 /// An HTML node.
170 Html,
171 /// An inline HTML node.
172 InlineHtml,
173 /// A reference to a footnote with given label, which may or may not be defined
174 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
175 /// occur in any order.
176 FootnoteReference,
177 /// A soft line break.
178 SoftBreak,
179 /// A hard line break.
180 HardBreak,
181 /// A horizontal ruler.
182 Rule,
183 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
184 TaskListMarker(bool),
185}
186
187/// Tags for elements that can contain other elements.
188#[derive(Clone, Debug, PartialEq)]
189pub enum MarkdownTag {
190 /// A paragraph of text and other inline elements.
191 Paragraph,
192
193 /// A heading, with optional identifier, classes and custom attributes.
194 /// The identifier is prefixed with `#` and the last one in the attributes
195 /// list is chosen, classes are prefixed with `.` and custom attributes
196 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
197 Heading {
198 level: HeadingLevel,
199 id: Option<SharedString>,
200 classes: Vec<SharedString>,
201 /// The first item of the tuple is the attr and second one the value.
202 attrs: Vec<(SharedString, Option<SharedString>)>,
203 },
204
205 BlockQuote,
206
207 /// A code block.
208 CodeBlock(CodeBlockKind),
209
210 /// A HTML block.
211 HtmlBlock,
212
213 /// A list. If the list is ordered the field indicates the number of the first item.
214 /// Contains only list items.
215 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
216
217 /// A list item.
218 Item,
219
220 /// A footnote definition. The value contained is the footnote's label by which it can
221 /// be referred to.
222 FootnoteDefinition(SharedString),
223
224 /// A table. Contains a vector describing the text-alignment for each of its columns.
225 Table(Vec<Alignment>),
226
227 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
228 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
229 TableHead,
230
231 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
232 TableRow,
233 TableCell,
234
235 // span-level tags
236 Emphasis,
237 Strong,
238 Strikethrough,
239
240 /// A link.
241 Link {
242 link_type: LinkType,
243 dest_url: SharedString,
244 title: SharedString,
245 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
246 id: SharedString,
247 },
248
249 /// An image. The first field is the link type, the second the destination URL and the third is a title,
250 /// the fourth is the link identifier.
251 Image {
252 link_type: LinkType,
253 dest_url: SharedString,
254 title: SharedString,
255 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
256 id: SharedString,
257 },
258
259 /// A metadata block.
260 MetadataBlock(MetadataBlockKind),
261
262 DefinitionList,
263 DefinitionListTitle,
264 DefinitionListDefinition,
265}
266
267#[derive(Clone, Debug, PartialEq)]
268pub enum CodeBlockKind {
269 Indented,
270 /// The value contained in the tag describes the language of the code, which may be empty.
271 Fenced(SharedString),
272}
273
274impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
275 fn from(tag: pulldown_cmark::Tag) -> Self {
276 match tag {
277 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
278 pulldown_cmark::Tag::Heading {
279 level,
280 id,
281 classes,
282 attrs,
283 } => {
284 let id = id.map(|id| SharedString::from(id.into_string()));
285 let classes = classes
286 .into_iter()
287 .map(|c| SharedString::from(c.into_string()))
288 .collect();
289 let attrs = attrs
290 .into_iter()
291 .map(|(key, value)| {
292 (
293 SharedString::from(key.into_string()),
294 value.map(|v| SharedString::from(v.into_string())),
295 )
296 })
297 .collect();
298 MarkdownTag::Heading {
299 level,
300 id,
301 classes,
302 attrs,
303 }
304 }
305 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
306 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
307 pulldown_cmark::CodeBlockKind::Indented => {
308 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
309 }
310 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
311 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
312 ),
313 },
314 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
315 pulldown_cmark::Tag::Item => MarkdownTag::Item,
316 pulldown_cmark::Tag::FootnoteDefinition(label) => {
317 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
318 }
319 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
320 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
321 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
322 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
323 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
324 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
325 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
326 pulldown_cmark::Tag::Link {
327 link_type,
328 dest_url,
329 title,
330 id,
331 } => MarkdownTag::Link {
332 link_type,
333 dest_url: SharedString::from(dest_url.into_string()),
334 title: SharedString::from(title.into_string()),
335 id: SharedString::from(id.into_string()),
336 },
337 pulldown_cmark::Tag::Image {
338 link_type,
339 dest_url,
340 title,
341 id,
342 } => MarkdownTag::Image {
343 link_type,
344 dest_url: SharedString::from(dest_url.into_string()),
345 title: SharedString::from(title.into_string()),
346 id: SharedString::from(id.into_string()),
347 },
348 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
349 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
350 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
351 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
352 pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
353 }
354 }
355}