1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut events = Vec::new();
9 let mut within_link = false;
10 let mut within_metadata = false;
11 for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
12 if within_metadata {
13 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
14 pulldown_event
15 {
16 within_metadata = false;
17 }
18 continue;
19 }
20 match pulldown_event {
21 pulldown_cmark::Event::Start(tag) => {
22 match tag {
23 pulldown_cmark::Tag::Link { .. } => within_link = true,
24 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
25 _ => {}
26 }
27 events.push((range, MarkdownEvent::Start(tag.into())))
28 }
29 pulldown_cmark::Event::End(tag) => {
30 if let pulldown_cmark::TagEnd::Link = tag {
31 within_link = false;
32 }
33 events.push((range, MarkdownEvent::End(tag)));
34 }
35 pulldown_cmark::Event::Text(_) => {
36 // Automatically detect links in text if we're not already within a markdown
37 // link.
38 if !within_link {
39 let mut finder = LinkFinder::new();
40 finder.kinds(&[linkify::LinkKind::Url]);
41 let text_range = range.clone();
42 for link in finder.links(&text[text_range.clone()]) {
43 let link_range =
44 text_range.start + link.start()..text_range.start + link.end();
45
46 if link_range.start > range.start {
47 events.push((range.start..link_range.start, MarkdownEvent::Text));
48 }
49
50 events.push((
51 link_range.clone(),
52 MarkdownEvent::Start(MarkdownTag::Link {
53 link_type: LinkType::Autolink,
54 dest_url: SharedString::from(link.as_str().to_string()),
55 title: SharedString::default(),
56 id: SharedString::default(),
57 }),
58 ));
59 events.push((link_range.clone(), MarkdownEvent::Text));
60 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
61
62 range.start = link_range.end;
63 }
64 }
65
66 if range.start < range.end {
67 events.push((range, MarkdownEvent::Text));
68 }
69 }
70 pulldown_cmark::Event::Code(_) => {
71 range.start += 1;
72 range.end -= 1;
73 events.push((range, MarkdownEvent::Code))
74 }
75 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
76 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
77 pulldown_cmark::Event::FootnoteReference(_) => {
78 events.push((range, MarkdownEvent::FootnoteReference))
79 }
80 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
81 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
82 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
83 pulldown_cmark::Event::TaskListMarker(checked) => {
84 events.push((range, MarkdownEvent::TaskListMarker(checked)))
85 }
86 }
87 }
88 events
89}
90
91pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
92 let mut events = Vec::new();
93 let mut finder = LinkFinder::new();
94 finder.kinds(&[linkify::LinkKind::Url]);
95 let mut text_range = Range {
96 start: 0,
97 end: text.len(),
98 };
99 for link in finder.links(&text) {
100 let link_range = link.start()..link.end();
101
102 if link_range.start > text_range.start {
103 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
104 }
105
106 events.push((
107 link_range.clone(),
108 MarkdownEvent::Start(MarkdownTag::Link {
109 link_type: LinkType::Autolink,
110 dest_url: SharedString::from(link.as_str().to_string()),
111 title: SharedString::default(),
112 id: SharedString::default(),
113 }),
114 ));
115 events.push((link_range.clone(), MarkdownEvent::Text));
116 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
117
118 text_range.start = link_range.end;
119 }
120
121 if text_range.end > text_range.start {
122 events.push((text_range, MarkdownEvent::Text));
123 }
124
125 events
126}
127
128/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
129/// parse result for rendering without resorting to unsafe lifetime coercion.
130#[derive(Clone, Debug, PartialEq)]
131pub enum MarkdownEvent {
132 /// Start of a tagged element. Events that are yielded after this event
133 /// and before its corresponding `End` event are inside this element.
134 /// Start and end events are guaranteed to be balanced.
135 Start(MarkdownTag),
136 /// End of a tagged element.
137 End(MarkdownTagEnd),
138 /// A text node.
139 Text,
140 /// An inline code node.
141 Code,
142 /// An HTML node.
143 Html,
144 /// An inline HTML node.
145 InlineHtml,
146 /// A reference to a footnote with given label, which may or may not be defined
147 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
148 /// occur in any order.
149 FootnoteReference,
150 /// A soft line break.
151 SoftBreak,
152 /// A hard line break.
153 HardBreak,
154 /// A horizontal ruler.
155 Rule,
156 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
157 TaskListMarker(bool),
158}
159
160/// Tags for elements that can contain other elements.
161#[derive(Clone, Debug, PartialEq)]
162pub enum MarkdownTag {
163 /// A paragraph of text and other inline elements.
164 Paragraph,
165
166 /// A heading, with optional identifier, classes and custom attributes.
167 /// The identifier is prefixed with `#` and the last one in the attributes
168 /// list is chosen, classes are prefixed with `.` and custom attributes
169 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
170 Heading {
171 level: HeadingLevel,
172 id: Option<SharedString>,
173 classes: Vec<SharedString>,
174 /// The first item of the tuple is the attr and second one the value.
175 attrs: Vec<(SharedString, Option<SharedString>)>,
176 },
177
178 BlockQuote,
179
180 /// A code block.
181 CodeBlock(CodeBlockKind),
182
183 /// A HTML block.
184 HtmlBlock,
185
186 /// A list. If the list is ordered the field indicates the number of the first item.
187 /// Contains only list items.
188 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
189
190 /// A list item.
191 Item,
192
193 /// A footnote definition. The value contained is the footnote's label by which it can
194 /// be referred to.
195 FootnoteDefinition(SharedString),
196
197 /// A table. Contains a vector describing the text-alignment for each of its columns.
198 Table(Vec<Alignment>),
199
200 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
201 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
202 TableHead,
203
204 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
205 TableRow,
206 TableCell,
207
208 // span-level tags
209 Emphasis,
210 Strong,
211 Strikethrough,
212
213 /// A link.
214 Link {
215 link_type: LinkType,
216 dest_url: SharedString,
217 title: SharedString,
218 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
219 id: SharedString,
220 },
221
222 /// An image. The first field is the link type, the second the destination URL and the third is a title,
223 /// the fourth is the link identifier.
224 Image {
225 link_type: LinkType,
226 dest_url: SharedString,
227 title: SharedString,
228 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
229 id: SharedString,
230 },
231
232 /// A metadata block.
233 MetadataBlock(MetadataBlockKind),
234}
235
236#[derive(Clone, Debug, PartialEq)]
237pub enum CodeBlockKind {
238 Indented,
239 /// The value contained in the tag describes the language of the code, which may be empty.
240 Fenced(SharedString),
241}
242
243impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
244 fn from(tag: pulldown_cmark::Tag) -> Self {
245 match tag {
246 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
247 pulldown_cmark::Tag::Heading {
248 level,
249 id,
250 classes,
251 attrs,
252 } => {
253 let id = id.map(|id| SharedString::from(id.into_string()));
254 let classes = classes
255 .into_iter()
256 .map(|c| SharedString::from(c.into_string()))
257 .collect();
258 let attrs = attrs
259 .into_iter()
260 .map(|(key, value)| {
261 (
262 SharedString::from(key.into_string()),
263 value.map(|v| SharedString::from(v.into_string())),
264 )
265 })
266 .collect();
267 MarkdownTag::Heading {
268 level,
269 id,
270 classes,
271 attrs,
272 }
273 }
274 pulldown_cmark::Tag::BlockQuote => MarkdownTag::BlockQuote,
275 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
276 pulldown_cmark::CodeBlockKind::Indented => {
277 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
278 }
279 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
280 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
281 ),
282 },
283 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
284 pulldown_cmark::Tag::Item => MarkdownTag::Item,
285 pulldown_cmark::Tag::FootnoteDefinition(label) => {
286 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
287 }
288 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
289 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
290 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
291 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
292 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
293 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
294 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
295 pulldown_cmark::Tag::Link {
296 link_type,
297 dest_url,
298 title,
299 id,
300 } => MarkdownTag::Link {
301 link_type,
302 dest_url: SharedString::from(dest_url.into_string()),
303 title: SharedString::from(title.into_string()),
304 id: SharedString::from(id.into_string()),
305 },
306 pulldown_cmark::Tag::Image {
307 link_type,
308 dest_url,
309 title,
310 id,
311 } => MarkdownTag::Image {
312 link_type,
313 dest_url: SharedString::from(dest_url.into_string()),
314 title: SharedString::from(title.into_string()),
315 id: SharedString::from(id.into_string()),
316 },
317 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
318 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
319 }
320 }
321}