1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::{collections::HashSet, ops::Range};
6
7pub fn parse_markdown(text: &str) -> (Vec<(Range<usize>, MarkdownEvent)>, HashSet<SharedString>) {
8 let mut options = Options::all();
9 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
10 options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
11 options.remove(pulldown_cmark::Options::ENABLE_MATH);
12
13 let mut events = Vec::new();
14 let mut languages = HashSet::new();
15 let mut within_link = false;
16 let mut within_metadata = false;
17 for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
18 if within_metadata {
19 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
20 pulldown_event
21 {
22 within_metadata = false;
23 }
24 continue;
25 }
26 match pulldown_event {
27 pulldown_cmark::Event::Start(tag) => {
28 match tag {
29 pulldown_cmark::Tag::Link { .. } => within_link = true,
30 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
31 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
32 ref language,
33 )) => {
34 languages.insert(SharedString::from(language.to_string()));
35 }
36 _ => {}
37 }
38 events.push((range, MarkdownEvent::Start(tag.into())))
39 }
40 pulldown_cmark::Event::End(tag) => {
41 if let pulldown_cmark::TagEnd::Link = tag {
42 within_link = false;
43 }
44 events.push((range, MarkdownEvent::End(tag)));
45 }
46 pulldown_cmark::Event::Text(parsed) => {
47 // Automatically detect links in text if we're not already within a markdown
48 // link.
49 let mut parsed = parsed.as_ref();
50 if !within_link {
51 let mut finder = LinkFinder::new();
52 finder.kinds(&[linkify::LinkKind::Url]);
53 let text_range = range.clone();
54 for link in finder.links(&text[text_range.clone()]) {
55 let link_range =
56 text_range.start + link.start()..text_range.start + link.end();
57
58 if link_range.start > range.start {
59 let (text, tail) = parsed.split_at(link_range.start - range.start);
60 events.push((
61 range.start..link_range.start,
62 MarkdownEvent::Text(SharedString::new(text)),
63 ));
64 parsed = tail;
65 }
66
67 events.push((
68 link_range.clone(),
69 MarkdownEvent::Start(MarkdownTag::Link {
70 link_type: LinkType::Autolink,
71 dest_url: SharedString::from(link.as_str().to_string()),
72 title: SharedString::default(),
73 id: SharedString::default(),
74 }),
75 ));
76
77 let (link_text, tail) = parsed.split_at(link_range.end - link_range.start);
78 events.push((
79 link_range.clone(),
80 MarkdownEvent::Text(SharedString::new(link_text)),
81 ));
82 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
83
84 range.start = link_range.end;
85 parsed = tail;
86 }
87 }
88 if range.start < range.end {
89 events.push((range, MarkdownEvent::Text(SharedString::new(parsed))));
90 }
91 }
92 pulldown_cmark::Event::Code(_) => {
93 range.start += 1;
94 range.end -= 1;
95 events.push((range, MarkdownEvent::Code))
96 }
97 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
98 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
99 pulldown_cmark::Event::FootnoteReference(_) => {
100 events.push((range, MarkdownEvent::FootnoteReference))
101 }
102 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
103 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
104 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
105 pulldown_cmark::Event::TaskListMarker(checked) => {
106 events.push((range, MarkdownEvent::TaskListMarker(checked)))
107 }
108 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
109 }
110 }
111 (events, languages)
112}
113
114pub fn parse_links_only(mut text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
115 let mut events = Vec::new();
116 let mut finder = LinkFinder::new();
117 finder.kinds(&[linkify::LinkKind::Url]);
118 let mut text_range = Range {
119 start: 0,
120 end: text.len(),
121 };
122 for link in finder.links(text) {
123 let link_range = link.start()..link.end();
124
125 if link_range.start > text_range.start {
126 let (head, tail) = text.split_at(link_range.start - text_range.start);
127 events.push((
128 text_range.start..link_range.start,
129 MarkdownEvent::Text(SharedString::new(head)),
130 ));
131 text = tail;
132 }
133
134 let (link_text, tail) = text.split_at(link_range.end - link_range.start);
135 events.push((
136 link_range.clone(),
137 MarkdownEvent::Start(MarkdownTag::Link {
138 link_type: LinkType::Autolink,
139 dest_url: SharedString::from(link.as_str().to_string()),
140 title: SharedString::default(),
141 id: SharedString::default(),
142 }),
143 ));
144 events.push((
145 link_range.clone(),
146 MarkdownEvent::Text(SharedString::new(link_text)),
147 ));
148 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
149
150 text_range.start = link_range.end;
151 text = tail;
152 }
153
154 if text_range.end > text_range.start {
155 events.push((text_range, MarkdownEvent::Text(SharedString::new(text))));
156 }
157
158 events
159}
160
161/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
162/// parse result for rendering without resorting to unsafe lifetime coercion.
163#[derive(Clone, Debug, PartialEq)]
164pub enum MarkdownEvent {
165 /// Start of a tagged element. Events that are yielded after this event
166 /// and before its corresponding `End` event are inside this element.
167 /// Start and end events are guaranteed to be balanced.
168 Start(MarkdownTag),
169 /// End of a tagged element.
170 End(MarkdownTagEnd),
171 /// A text node.
172 Text(SharedString),
173 /// An inline code node.
174 Code,
175 /// An HTML node.
176 Html,
177 /// An inline HTML node.
178 InlineHtml,
179 /// A reference to a footnote with given label, which may or may not be defined
180 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
181 /// occur in any order.
182 FootnoteReference,
183 /// A soft line break.
184 SoftBreak,
185 /// A hard line break.
186 HardBreak,
187 /// A horizontal ruler.
188 Rule,
189 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
190 TaskListMarker(bool),
191}
192
193/// Tags for elements that can contain other elements.
194#[derive(Clone, Debug, PartialEq)]
195pub enum MarkdownTag {
196 /// A paragraph of text and other inline elements.
197 Paragraph,
198
199 /// A heading, with optional identifier, classes and custom attributes.
200 /// The identifier is prefixed with `#` and the last one in the attributes
201 /// list is chosen, classes are prefixed with `.` and custom attributes
202 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
203 Heading {
204 level: HeadingLevel,
205 id: Option<SharedString>,
206 classes: Vec<SharedString>,
207 /// The first item of the tuple is the attr and second one the value.
208 attrs: Vec<(SharedString, Option<SharedString>)>,
209 },
210
211 BlockQuote,
212
213 /// A code block.
214 CodeBlock(CodeBlockKind),
215
216 /// A HTML block.
217 HtmlBlock,
218
219 /// A list. If the list is ordered the field indicates the number of the first item.
220 /// Contains only list items.
221 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
222
223 /// A list item.
224 Item,
225
226 /// A footnote definition. The value contained is the footnote's label by which it can
227 /// be referred to.
228 FootnoteDefinition(SharedString),
229
230 /// A table. Contains a vector describing the text-alignment for each of its columns.
231 Table(Vec<Alignment>),
232
233 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
234 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
235 TableHead,
236
237 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
238 TableRow,
239 TableCell,
240
241 // span-level tags
242 Emphasis,
243 Strong,
244 Strikethrough,
245
246 /// A link.
247 Link {
248 link_type: LinkType,
249 dest_url: SharedString,
250 title: SharedString,
251 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
252 id: SharedString,
253 },
254
255 /// An image. The first field is the link type, the second the destination URL and the third is a title,
256 /// the fourth is the link identifier.
257 Image {
258 link_type: LinkType,
259 dest_url: SharedString,
260 title: SharedString,
261 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
262 id: SharedString,
263 },
264
265 /// A metadata block.
266 MetadataBlock(MetadataBlockKind),
267
268 DefinitionList,
269 DefinitionListTitle,
270 DefinitionListDefinition,
271}
272
273#[derive(Clone, Debug, PartialEq)]
274pub enum CodeBlockKind {
275 Indented,
276 /// The value contained in the tag describes the language of the code, which may be empty.
277 Fenced(SharedString),
278}
279
280impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
281 fn from(tag: pulldown_cmark::Tag) -> Self {
282 match tag {
283 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
284 pulldown_cmark::Tag::Heading {
285 level,
286 id,
287 classes,
288 attrs,
289 } => {
290 let id = id.map(|id| SharedString::from(id.into_string()));
291 let classes = classes
292 .into_iter()
293 .map(|c| SharedString::from(c.into_string()))
294 .collect();
295 let attrs = attrs
296 .into_iter()
297 .map(|(key, value)| {
298 (
299 SharedString::from(key.into_string()),
300 value.map(|v| SharedString::from(v.into_string())),
301 )
302 })
303 .collect();
304 MarkdownTag::Heading {
305 level,
306 id,
307 classes,
308 attrs,
309 }
310 }
311 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
312 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
313 pulldown_cmark::CodeBlockKind::Indented => {
314 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
315 }
316 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
317 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
318 ),
319 },
320 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
321 pulldown_cmark::Tag::Item => MarkdownTag::Item,
322 pulldown_cmark::Tag::FootnoteDefinition(label) => {
323 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
324 }
325 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
326 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
327 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
328 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
329 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
330 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
331 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
332 pulldown_cmark::Tag::Link {
333 link_type,
334 dest_url,
335 title,
336 id,
337 } => MarkdownTag::Link {
338 link_type,
339 dest_url: SharedString::from(dest_url.into_string()),
340 title: SharedString::from(title.into_string()),
341 id: SharedString::from(id.into_string()),
342 },
343 pulldown_cmark::Tag::Image {
344 link_type,
345 dest_url,
346 title,
347 id,
348 } => MarkdownTag::Image {
349 link_type,
350 dest_url: SharedString::from(dest_url.into_string()),
351 title: SharedString::from(title.into_string()),
352 id: SharedString::from(id.into_string()),
353 },
354 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
355 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
356 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
357 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
358 pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
359 }
360 }
361}