1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut options = Options::all();
9 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
10 options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
11
12 let mut events = Vec::new();
13 let mut within_link = false;
14 let mut within_metadata = false;
15 for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
16 if within_metadata {
17 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
18 pulldown_event
19 {
20 within_metadata = false;
21 }
22 continue;
23 }
24 match pulldown_event {
25 pulldown_cmark::Event::Start(tag) => {
26 match tag {
27 pulldown_cmark::Tag::Link { .. } => within_link = true,
28 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
29 _ => {}
30 }
31 events.push((range, MarkdownEvent::Start(tag.into())))
32 }
33 pulldown_cmark::Event::End(tag) => {
34 if let pulldown_cmark::TagEnd::Link = tag {
35 within_link = false;
36 }
37 events.push((range, MarkdownEvent::End(tag)));
38 }
39 pulldown_cmark::Event::Text(_) => {
40 // Automatically detect links in text if we're not already within a markdown
41 // link.
42 if !within_link {
43 let mut finder = LinkFinder::new();
44 finder.kinds(&[linkify::LinkKind::Url]);
45 let text_range = range.clone();
46 for link in finder.links(&text[text_range.clone()]) {
47 let link_range =
48 text_range.start + link.start()..text_range.start + link.end();
49
50 if link_range.start > range.start {
51 events.push((range.start..link_range.start, MarkdownEvent::Text));
52 }
53
54 events.push((
55 link_range.clone(),
56 MarkdownEvent::Start(MarkdownTag::Link {
57 link_type: LinkType::Autolink,
58 dest_url: SharedString::from(link.as_str().to_string()),
59 title: SharedString::default(),
60 id: SharedString::default(),
61 }),
62 ));
63 events.push((link_range.clone(), MarkdownEvent::Text));
64 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
65
66 range.start = link_range.end;
67 }
68 }
69
70 if range.start < range.end {
71 events.push((range, MarkdownEvent::Text));
72 }
73 }
74 pulldown_cmark::Event::Code(_) => {
75 range.start += 1;
76 range.end -= 1;
77 events.push((range, MarkdownEvent::Code))
78 }
79 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
80 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
81 pulldown_cmark::Event::FootnoteReference(_) => {
82 events.push((range, MarkdownEvent::FootnoteReference))
83 }
84 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
85 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
86 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
87 pulldown_cmark::Event::TaskListMarker(checked) => {
88 events.push((range, MarkdownEvent::TaskListMarker(checked)))
89 }
90 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
91 }
92 }
93 events
94}
95
96pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
97 let mut events = Vec::new();
98 let mut finder = LinkFinder::new();
99 finder.kinds(&[linkify::LinkKind::Url]);
100 let mut text_range = Range {
101 start: 0,
102 end: text.len(),
103 };
104 for link in finder.links(text) {
105 let link_range = link.start()..link.end();
106
107 if link_range.start > text_range.start {
108 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
109 }
110
111 events.push((
112 link_range.clone(),
113 MarkdownEvent::Start(MarkdownTag::Link {
114 link_type: LinkType::Autolink,
115 dest_url: SharedString::from(link.as_str().to_string()),
116 title: SharedString::default(),
117 id: SharedString::default(),
118 }),
119 ));
120 events.push((link_range.clone(), MarkdownEvent::Text));
121 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
122
123 text_range.start = link_range.end;
124 }
125
126 if text_range.end > text_range.start {
127 events.push((text_range, MarkdownEvent::Text));
128 }
129
130 events
131}
132
133/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
134/// parse result for rendering without resorting to unsafe lifetime coercion.
135#[derive(Clone, Debug, PartialEq)]
136pub enum MarkdownEvent {
137 /// Start of a tagged element. Events that are yielded after this event
138 /// and before its corresponding `End` event are inside this element.
139 /// Start and end events are guaranteed to be balanced.
140 Start(MarkdownTag),
141 /// End of a tagged element.
142 End(MarkdownTagEnd),
143 /// A text node.
144 Text,
145 /// An inline code node.
146 Code,
147 /// An HTML node.
148 Html,
149 /// An inline HTML node.
150 InlineHtml,
151 /// A reference to a footnote with given label, which may or may not be defined
152 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
153 /// occur in any order.
154 FootnoteReference,
155 /// A soft line break.
156 SoftBreak,
157 /// A hard line break.
158 HardBreak,
159 /// A horizontal ruler.
160 Rule,
161 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
162 TaskListMarker(bool),
163}
164
165/// Tags for elements that can contain other elements.
166#[derive(Clone, Debug, PartialEq)]
167pub enum MarkdownTag {
168 /// A paragraph of text and other inline elements.
169 Paragraph,
170
171 /// A heading, with optional identifier, classes and custom attributes.
172 /// The identifier is prefixed with `#` and the last one in the attributes
173 /// list is chosen, classes are prefixed with `.` and custom attributes
174 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
175 Heading {
176 level: HeadingLevel,
177 id: Option<SharedString>,
178 classes: Vec<SharedString>,
179 /// The first item of the tuple is the attr and second one the value.
180 attrs: Vec<(SharedString, Option<SharedString>)>,
181 },
182
183 BlockQuote,
184
185 /// A code block.
186 CodeBlock(CodeBlockKind),
187
188 /// A HTML block.
189 HtmlBlock,
190
191 /// A list. If the list is ordered the field indicates the number of the first item.
192 /// Contains only list items.
193 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
194
195 /// A list item.
196 Item,
197
198 /// A footnote definition. The value contained is the footnote's label by which it can
199 /// be referred to.
200 FootnoteDefinition(SharedString),
201
202 /// A table. Contains a vector describing the text-alignment for each of its columns.
203 Table(Vec<Alignment>),
204
205 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
206 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
207 TableHead,
208
209 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
210 TableRow,
211 TableCell,
212
213 // span-level tags
214 Emphasis,
215 Strong,
216 Strikethrough,
217
218 /// A link.
219 Link {
220 link_type: LinkType,
221 dest_url: SharedString,
222 title: SharedString,
223 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
224 id: SharedString,
225 },
226
227 /// An image. The first field is the link type, the second the destination URL and the third is a title,
228 /// the fourth is the link identifier.
229 Image {
230 link_type: LinkType,
231 dest_url: SharedString,
232 title: SharedString,
233 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
234 id: SharedString,
235 },
236
237 /// A metadata block.
238 MetadataBlock(MetadataBlockKind),
239
240 DefinitionList,
241 DefinitionListTitle,
242 DefinitionListDefinition,
243}
244
245#[derive(Clone, Debug, PartialEq)]
246pub enum CodeBlockKind {
247 Indented,
248 /// The value contained in the tag describes the language of the code, which may be empty.
249 Fenced(SharedString),
250}
251
252impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
253 fn from(tag: pulldown_cmark::Tag) -> Self {
254 match tag {
255 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
256 pulldown_cmark::Tag::Heading {
257 level,
258 id,
259 classes,
260 attrs,
261 } => {
262 let id = id.map(|id| SharedString::from(id.into_string()));
263 let classes = classes
264 .into_iter()
265 .map(|c| SharedString::from(c.into_string()))
266 .collect();
267 let attrs = attrs
268 .into_iter()
269 .map(|(key, value)| {
270 (
271 SharedString::from(key.into_string()),
272 value.map(|v| SharedString::from(v.into_string())),
273 )
274 })
275 .collect();
276 MarkdownTag::Heading {
277 level,
278 id,
279 classes,
280 attrs,
281 }
282 }
283 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
284 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
285 pulldown_cmark::CodeBlockKind::Indented => {
286 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
287 }
288 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
289 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
290 ),
291 },
292 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
293 pulldown_cmark::Tag::Item => MarkdownTag::Item,
294 pulldown_cmark::Tag::FootnoteDefinition(label) => {
295 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
296 }
297 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
298 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
299 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
300 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
301 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
302 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
303 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
304 pulldown_cmark::Tag::Link {
305 link_type,
306 dest_url,
307 title,
308 id,
309 } => MarkdownTag::Link {
310 link_type,
311 dest_url: SharedString::from(dest_url.into_string()),
312 title: SharedString::from(title.into_string()),
313 id: SharedString::from(id.into_string()),
314 },
315 pulldown_cmark::Tag::Image {
316 link_type,
317 dest_url,
318 title,
319 id,
320 } => MarkdownTag::Image {
321 link_type,
322 dest_url: SharedString::from(dest_url.into_string()),
323 title: SharedString::from(title.into_string()),
324 id: SharedString::from(id.into_string()),
325 },
326 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
327 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
328 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
329 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
330 pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
331 }
332 }
333}