1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut options = Options::all();
9 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
10
11 let mut events = Vec::new();
12 let mut within_link = false;
13 let mut within_metadata = false;
14 for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
15 if within_metadata {
16 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
17 pulldown_event
18 {
19 within_metadata = false;
20 }
21 continue;
22 }
23 match pulldown_event {
24 pulldown_cmark::Event::Start(tag) => {
25 match tag {
26 pulldown_cmark::Tag::Link { .. } => within_link = true,
27 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
28 _ => {}
29 }
30 events.push((range, MarkdownEvent::Start(tag.into())))
31 }
32 pulldown_cmark::Event::End(tag) => {
33 if let pulldown_cmark::TagEnd::Link = tag {
34 within_link = false;
35 }
36 events.push((range, MarkdownEvent::End(tag)));
37 }
38 pulldown_cmark::Event::Text(_) => {
39 // Automatically detect links in text if we're not already within a markdown
40 // link.
41 if !within_link {
42 let mut finder = LinkFinder::new();
43 finder.kinds(&[linkify::LinkKind::Url]);
44 let text_range = range.clone();
45 for link in finder.links(&text[text_range.clone()]) {
46 let link_range =
47 text_range.start + link.start()..text_range.start + link.end();
48
49 if link_range.start > range.start {
50 events.push((range.start..link_range.start, MarkdownEvent::Text));
51 }
52
53 events.push((
54 link_range.clone(),
55 MarkdownEvent::Start(MarkdownTag::Link {
56 link_type: LinkType::Autolink,
57 dest_url: SharedString::from(link.as_str().to_string()),
58 title: SharedString::default(),
59 id: SharedString::default(),
60 }),
61 ));
62 events.push((link_range.clone(), MarkdownEvent::Text));
63 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
64
65 range.start = link_range.end;
66 }
67 }
68
69 if range.start < range.end {
70 events.push((range, MarkdownEvent::Text));
71 }
72 }
73 pulldown_cmark::Event::Code(_) => {
74 range.start += 1;
75 range.end -= 1;
76 events.push((range, MarkdownEvent::Code))
77 }
78 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
79 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
80 pulldown_cmark::Event::FootnoteReference(_) => {
81 events.push((range, MarkdownEvent::FootnoteReference))
82 }
83 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
84 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
85 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
86 pulldown_cmark::Event::TaskListMarker(checked) => {
87 events.push((range, MarkdownEvent::TaskListMarker(checked)))
88 }
89 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
90 }
91 }
92 events
93}
94
95pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
96 let mut events = Vec::new();
97 let mut finder = LinkFinder::new();
98 finder.kinds(&[linkify::LinkKind::Url]);
99 let mut text_range = Range {
100 start: 0,
101 end: text.len(),
102 };
103 for link in finder.links(text) {
104 let link_range = link.start()..link.end();
105
106 if link_range.start > text_range.start {
107 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
108 }
109
110 events.push((
111 link_range.clone(),
112 MarkdownEvent::Start(MarkdownTag::Link {
113 link_type: LinkType::Autolink,
114 dest_url: SharedString::from(link.as_str().to_string()),
115 title: SharedString::default(),
116 id: SharedString::default(),
117 }),
118 ));
119 events.push((link_range.clone(), MarkdownEvent::Text));
120 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
121
122 text_range.start = link_range.end;
123 }
124
125 if text_range.end > text_range.start {
126 events.push((text_range, MarkdownEvent::Text));
127 }
128
129 events
130}
131
132/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
133/// parse result for rendering without resorting to unsafe lifetime coercion.
134#[derive(Clone, Debug, PartialEq)]
135pub enum MarkdownEvent {
136 /// Start of a tagged element. Events that are yielded after this event
137 /// and before its corresponding `End` event are inside this element.
138 /// Start and end events are guaranteed to be balanced.
139 Start(MarkdownTag),
140 /// End of a tagged element.
141 End(MarkdownTagEnd),
142 /// A text node.
143 Text,
144 /// An inline code node.
145 Code,
146 /// An HTML node.
147 Html,
148 /// An inline HTML node.
149 InlineHtml,
150 /// A reference to a footnote with given label, which may or may not be defined
151 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
152 /// occur in any order.
153 FootnoteReference,
154 /// A soft line break.
155 SoftBreak,
156 /// A hard line break.
157 HardBreak,
158 /// A horizontal ruler.
159 Rule,
160 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
161 TaskListMarker(bool),
162}
163
164/// Tags for elements that can contain other elements.
165#[derive(Clone, Debug, PartialEq)]
166pub enum MarkdownTag {
167 /// A paragraph of text and other inline elements.
168 Paragraph,
169
170 /// A heading, with optional identifier, classes and custom attributes.
171 /// The identifier is prefixed with `#` and the last one in the attributes
172 /// list is chosen, classes are prefixed with `.` and custom attributes
173 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
174 Heading {
175 level: HeadingLevel,
176 id: Option<SharedString>,
177 classes: Vec<SharedString>,
178 /// The first item of the tuple is the attr and second one the value.
179 attrs: Vec<(SharedString, Option<SharedString>)>,
180 },
181
182 BlockQuote,
183
184 /// A code block.
185 CodeBlock(CodeBlockKind),
186
187 /// A HTML block.
188 HtmlBlock,
189
190 /// A list. If the list is ordered the field indicates the number of the first item.
191 /// Contains only list items.
192 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
193
194 /// A list item.
195 Item,
196
197 /// A footnote definition. The value contained is the footnote's label by which it can
198 /// be referred to.
199 FootnoteDefinition(SharedString),
200
201 /// A table. Contains a vector describing the text-alignment for each of its columns.
202 Table(Vec<Alignment>),
203
204 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
205 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
206 TableHead,
207
208 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
209 TableRow,
210 TableCell,
211
212 // span-level tags
213 Emphasis,
214 Strong,
215 Strikethrough,
216
217 /// A link.
218 Link {
219 link_type: LinkType,
220 dest_url: SharedString,
221 title: SharedString,
222 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
223 id: SharedString,
224 },
225
226 /// An image. The first field is the link type, the second the destination URL and the third is a title,
227 /// the fourth is the link identifier.
228 Image {
229 link_type: LinkType,
230 dest_url: SharedString,
231 title: SharedString,
232 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
233 id: SharedString,
234 },
235
236 /// A metadata block.
237 MetadataBlock(MetadataBlockKind),
238
239 DefinitionList,
240 DefinitionListTitle,
241 DefinitionListDefinition,
242}
243
244#[derive(Clone, Debug, PartialEq)]
245pub enum CodeBlockKind {
246 Indented,
247 /// The value contained in the tag describes the language of the code, which may be empty.
248 Fenced(SharedString),
249}
250
251impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
252 fn from(tag: pulldown_cmark::Tag) -> Self {
253 match tag {
254 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
255 pulldown_cmark::Tag::Heading {
256 level,
257 id,
258 classes,
259 attrs,
260 } => {
261 let id = id.map(|id| SharedString::from(id.into_string()));
262 let classes = classes
263 .into_iter()
264 .map(|c| SharedString::from(c.into_string()))
265 .collect();
266 let attrs = attrs
267 .into_iter()
268 .map(|(key, value)| {
269 (
270 SharedString::from(key.into_string()),
271 value.map(|v| SharedString::from(v.into_string())),
272 )
273 })
274 .collect();
275 MarkdownTag::Heading {
276 level,
277 id,
278 classes,
279 attrs,
280 }
281 }
282 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
283 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
284 pulldown_cmark::CodeBlockKind::Indented => {
285 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
286 }
287 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
288 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
289 ),
290 },
291 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
292 pulldown_cmark::Tag::Item => MarkdownTag::Item,
293 pulldown_cmark::Tag::FootnoteDefinition(label) => {
294 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
295 }
296 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
297 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
298 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
299 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
300 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
301 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
302 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
303 pulldown_cmark::Tag::Link {
304 link_type,
305 dest_url,
306 title,
307 id,
308 } => MarkdownTag::Link {
309 link_type,
310 dest_url: SharedString::from(dest_url.into_string()),
311 title: SharedString::from(title.into_string()),
312 id: SharedString::from(id.into_string()),
313 },
314 pulldown_cmark::Tag::Image {
315 link_type,
316 dest_url,
317 title,
318 id,
319 } => MarkdownTag::Image {
320 link_type,
321 dest_url: SharedString::from(dest_url.into_string()),
322 title: SharedString::from(title.into_string()),
323 id: SharedString::from(id.into_string()),
324 },
325 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
326 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
327 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
328 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
329 pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
330 }
331 }
332}