1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut events = Vec::new();
9 let mut within_link = false;
10 let mut within_metadata = false;
11 for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
12 if within_metadata {
13 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
14 pulldown_event
15 {
16 within_metadata = false;
17 }
18 continue;
19 }
20 match pulldown_event {
21 pulldown_cmark::Event::Start(tag) => {
22 match tag {
23 pulldown_cmark::Tag::Link { .. } => within_link = true,
24 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
25 _ => {}
26 }
27 events.push((range, MarkdownEvent::Start(tag.into())))
28 }
29 pulldown_cmark::Event::End(tag) => {
30 if let pulldown_cmark::TagEnd::Link = tag {
31 within_link = false;
32 }
33 events.push((range, MarkdownEvent::End(tag)));
34 }
35 pulldown_cmark::Event::Text(_) => {
36 // Automatically detect links in text if we're not already within a markdown
37 // link.
38 if !within_link {
39 let mut finder = LinkFinder::new();
40 finder.kinds(&[linkify::LinkKind::Url]);
41 let text_range = range.clone();
42 for link in finder.links(&text[text_range.clone()]) {
43 let link_range =
44 text_range.start + link.start()..text_range.start + link.end();
45
46 if link_range.start > range.start {
47 events.push((range.start..link_range.start, MarkdownEvent::Text));
48 }
49
50 events.push((
51 link_range.clone(),
52 MarkdownEvent::Start(MarkdownTag::Link {
53 link_type: LinkType::Autolink,
54 dest_url: SharedString::from(link.as_str().to_string()),
55 title: SharedString::default(),
56 id: SharedString::default(),
57 }),
58 ));
59 events.push((link_range.clone(), MarkdownEvent::Text));
60 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
61
62 range.start = link_range.end;
63 }
64 }
65
66 if range.start < range.end {
67 events.push((range, MarkdownEvent::Text));
68 }
69 }
70 pulldown_cmark::Event::Code(_) => {
71 range.start += 1;
72 range.end -= 1;
73 events.push((range, MarkdownEvent::Code))
74 }
75 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
76 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
77 pulldown_cmark::Event::FootnoteReference(_) => {
78 events.push((range, MarkdownEvent::FootnoteReference))
79 }
80 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
81 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
82 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
83 pulldown_cmark::Event::TaskListMarker(checked) => {
84 events.push((range, MarkdownEvent::TaskListMarker(checked)))
85 }
86 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
87 }
88 }
89 events
90}
91
92pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
93 let mut events = Vec::new();
94 let mut finder = LinkFinder::new();
95 finder.kinds(&[linkify::LinkKind::Url]);
96 let mut text_range = Range {
97 start: 0,
98 end: text.len(),
99 };
100 for link in finder.links(&text) {
101 let link_range = link.start()..link.end();
102
103 if link_range.start > text_range.start {
104 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
105 }
106
107 events.push((
108 link_range.clone(),
109 MarkdownEvent::Start(MarkdownTag::Link {
110 link_type: LinkType::Autolink,
111 dest_url: SharedString::from(link.as_str().to_string()),
112 title: SharedString::default(),
113 id: SharedString::default(),
114 }),
115 ));
116 events.push((link_range.clone(), MarkdownEvent::Text));
117 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
118
119 text_range.start = link_range.end;
120 }
121
122 if text_range.end > text_range.start {
123 events.push((text_range, MarkdownEvent::Text));
124 }
125
126 events
127}
128
129/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
130/// parse result for rendering without resorting to unsafe lifetime coercion.
131#[derive(Clone, Debug, PartialEq)]
132pub enum MarkdownEvent {
133 /// Start of a tagged element. Events that are yielded after this event
134 /// and before its corresponding `End` event are inside this element.
135 /// Start and end events are guaranteed to be balanced.
136 Start(MarkdownTag),
137 /// End of a tagged element.
138 End(MarkdownTagEnd),
139 /// A text node.
140 Text,
141 /// An inline code node.
142 Code,
143 /// An HTML node.
144 Html,
145 /// An inline HTML node.
146 InlineHtml,
147 /// A reference to a footnote with given label, which may or may not be defined
148 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
149 /// occur in any order.
150 FootnoteReference,
151 /// A soft line break.
152 SoftBreak,
153 /// A hard line break.
154 HardBreak,
155 /// A horizontal ruler.
156 Rule,
157 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
158 TaskListMarker(bool),
159}
160
161/// Tags for elements that can contain other elements.
162#[derive(Clone, Debug, PartialEq)]
163pub enum MarkdownTag {
164 /// A paragraph of text and other inline elements.
165 Paragraph,
166
167 /// A heading, with optional identifier, classes and custom attributes.
168 /// The identifier is prefixed with `#` and the last one in the attributes
169 /// list is chosen, classes are prefixed with `.` and custom attributes
170 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
171 Heading {
172 level: HeadingLevel,
173 id: Option<SharedString>,
174 classes: Vec<SharedString>,
175 /// The first item of the tuple is the attr and second one the value.
176 attrs: Vec<(SharedString, Option<SharedString>)>,
177 },
178
179 BlockQuote,
180
181 /// A code block.
182 CodeBlock(CodeBlockKind),
183
184 /// A HTML block.
185 HtmlBlock,
186
187 /// A list. If the list is ordered the field indicates the number of the first item.
188 /// Contains only list items.
189 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
190
191 /// A list item.
192 Item,
193
194 /// A footnote definition. The value contained is the footnote's label by which it can
195 /// be referred to.
196 FootnoteDefinition(SharedString),
197
198 /// A table. Contains a vector describing the text-alignment for each of its columns.
199 Table(Vec<Alignment>),
200
201 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
202 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
203 TableHead,
204
205 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
206 TableRow,
207 TableCell,
208
209 // span-level tags
210 Emphasis,
211 Strong,
212 Strikethrough,
213
214 /// A link.
215 Link {
216 link_type: LinkType,
217 dest_url: SharedString,
218 title: SharedString,
219 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
220 id: SharedString,
221 },
222
223 /// An image. The first field is the link type, the second the destination URL and the third is a title,
224 /// the fourth is the link identifier.
225 Image {
226 link_type: LinkType,
227 dest_url: SharedString,
228 title: SharedString,
229 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
230 id: SharedString,
231 },
232
233 /// A metadata block.
234 MetadataBlock(MetadataBlockKind),
235}
236
237#[derive(Clone, Debug, PartialEq)]
238pub enum CodeBlockKind {
239 Indented,
240 /// The value contained in the tag describes the language of the code, which may be empty.
241 Fenced(SharedString),
242}
243
244impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
245 fn from(tag: pulldown_cmark::Tag) -> Self {
246 match tag {
247 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
248 pulldown_cmark::Tag::Heading {
249 level,
250 id,
251 classes,
252 attrs,
253 } => {
254 let id = id.map(|id| SharedString::from(id.into_string()));
255 let classes = classes
256 .into_iter()
257 .map(|c| SharedString::from(c.into_string()))
258 .collect();
259 let attrs = attrs
260 .into_iter()
261 .map(|(key, value)| {
262 (
263 SharedString::from(key.into_string()),
264 value.map(|v| SharedString::from(v.into_string())),
265 )
266 })
267 .collect();
268 MarkdownTag::Heading {
269 level,
270 id,
271 classes,
272 attrs,
273 }
274 }
275 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
276 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
277 pulldown_cmark::CodeBlockKind::Indented => {
278 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
279 }
280 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
281 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
282 ),
283 },
284 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
285 pulldown_cmark::Tag::Item => MarkdownTag::Item,
286 pulldown_cmark::Tag::FootnoteDefinition(label) => {
287 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
288 }
289 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
290 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
291 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
292 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
293 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
294 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
295 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
296 pulldown_cmark::Tag::Link {
297 link_type,
298 dest_url,
299 title,
300 id,
301 } => MarkdownTag::Link {
302 link_type,
303 dest_url: SharedString::from(dest_url.into_string()),
304 title: SharedString::from(title.into_string()),
305 id: SharedString::from(id.into_string()),
306 },
307 pulldown_cmark::Tag::Image {
308 link_type,
309 dest_url,
310 title,
311 id,
312 } => MarkdownTag::Image {
313 link_type,
314 dest_url: SharedString::from(dest_url.into_string()),
315 title: SharedString::from(title.into_string()),
316 id: SharedString::from(id.into_string()),
317 },
318 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
319 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
320 pulldown_cmark::Tag::DefinitionList
321 | pulldown_cmark::Tag::DefinitionListTitle
322 | pulldown_cmark::Tag::DefinitionListDefinition => {
323 unimplemented!("definition lists are not yet supported")
324 }
325 }
326 }
327}