1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut options = Options::all();
9 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
10 options.remove(pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
11 options.remove(pulldown_cmark::Options::ENABLE_MATH);
12
13 let mut events = Vec::new();
14 let mut within_link = false;
15 let mut within_metadata = false;
16 for (pulldown_event, mut range) in Parser::new_ext(text, options).into_offset_iter() {
17 if within_metadata {
18 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
19 pulldown_event
20 {
21 within_metadata = false;
22 }
23 continue;
24 }
25 match pulldown_event {
26 pulldown_cmark::Event::Start(tag) => {
27 match tag {
28 pulldown_cmark::Tag::Link { .. } => within_link = true,
29 pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
30 _ => {}
31 }
32 events.push((range, MarkdownEvent::Start(tag.into())))
33 }
34 pulldown_cmark::Event::End(tag) => {
35 if let pulldown_cmark::TagEnd::Link = tag {
36 within_link = false;
37 }
38 events.push((range, MarkdownEvent::End(tag)));
39 }
40 pulldown_cmark::Event::Text(_) => {
41 // Automatically detect links in text if we're not already within a markdown
42 // link.
43 if !within_link {
44 let mut finder = LinkFinder::new();
45 finder.kinds(&[linkify::LinkKind::Url]);
46 let text_range = range.clone();
47 for link in finder.links(&text[text_range.clone()]) {
48 let link_range =
49 text_range.start + link.start()..text_range.start + link.end();
50
51 if link_range.start > range.start {
52 events.push((range.start..link_range.start, MarkdownEvent::Text));
53 }
54
55 events.push((
56 link_range.clone(),
57 MarkdownEvent::Start(MarkdownTag::Link {
58 link_type: LinkType::Autolink,
59 dest_url: SharedString::from(link.as_str().to_string()),
60 title: SharedString::default(),
61 id: SharedString::default(),
62 }),
63 ));
64 events.push((link_range.clone(), MarkdownEvent::Text));
65 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
66
67 range.start = link_range.end;
68 }
69 }
70
71 if range.start < range.end {
72 events.push((range, MarkdownEvent::Text));
73 }
74 }
75 pulldown_cmark::Event::Code(_) => {
76 range.start += 1;
77 range.end -= 1;
78 events.push((range, MarkdownEvent::Code))
79 }
80 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
81 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
82 pulldown_cmark::Event::FootnoteReference(_) => {
83 events.push((range, MarkdownEvent::FootnoteReference))
84 }
85 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
86 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
87 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
88 pulldown_cmark::Event::TaskListMarker(checked) => {
89 events.push((range, MarkdownEvent::TaskListMarker(checked)))
90 }
91 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
92 }
93 }
94 events
95}
96
97pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
98 let mut events = Vec::new();
99 let mut finder = LinkFinder::new();
100 finder.kinds(&[linkify::LinkKind::Url]);
101 let mut text_range = Range {
102 start: 0,
103 end: text.len(),
104 };
105 for link in finder.links(text) {
106 let link_range = link.start()..link.end();
107
108 if link_range.start > text_range.start {
109 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
110 }
111
112 events.push((
113 link_range.clone(),
114 MarkdownEvent::Start(MarkdownTag::Link {
115 link_type: LinkType::Autolink,
116 dest_url: SharedString::from(link.as_str().to_string()),
117 title: SharedString::default(),
118 id: SharedString::default(),
119 }),
120 ));
121 events.push((link_range.clone(), MarkdownEvent::Text));
122 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
123
124 text_range.start = link_range.end;
125 }
126
127 if text_range.end > text_range.start {
128 events.push((text_range, MarkdownEvent::Text));
129 }
130
131 events
132}
133
134/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
135/// parse result for rendering without resorting to unsafe lifetime coercion.
136#[derive(Clone, Debug, PartialEq)]
137pub enum MarkdownEvent {
138 /// Start of a tagged element. Events that are yielded after this event
139 /// and before its corresponding `End` event are inside this element.
140 /// Start and end events are guaranteed to be balanced.
141 Start(MarkdownTag),
142 /// End of a tagged element.
143 End(MarkdownTagEnd),
144 /// A text node.
145 Text,
146 /// An inline code node.
147 Code,
148 /// An HTML node.
149 Html,
150 /// An inline HTML node.
151 InlineHtml,
152 /// A reference to a footnote with given label, which may or may not be defined
153 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
154 /// occur in any order.
155 FootnoteReference,
156 /// A soft line break.
157 SoftBreak,
158 /// A hard line break.
159 HardBreak,
160 /// A horizontal ruler.
161 Rule,
162 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
163 TaskListMarker(bool),
164}
165
166/// Tags for elements that can contain other elements.
167#[derive(Clone, Debug, PartialEq)]
168pub enum MarkdownTag {
169 /// A paragraph of text and other inline elements.
170 Paragraph,
171
172 /// A heading, with optional identifier, classes and custom attributes.
173 /// The identifier is prefixed with `#` and the last one in the attributes
174 /// list is chosen, classes are prefixed with `.` and custom attributes
175 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
176 Heading {
177 level: HeadingLevel,
178 id: Option<SharedString>,
179 classes: Vec<SharedString>,
180 /// The first item of the tuple is the attr and second one the value.
181 attrs: Vec<(SharedString, Option<SharedString>)>,
182 },
183
184 BlockQuote,
185
186 /// A code block.
187 CodeBlock(CodeBlockKind),
188
189 /// A HTML block.
190 HtmlBlock,
191
192 /// A list. If the list is ordered the field indicates the number of the first item.
193 /// Contains only list items.
194 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
195
196 /// A list item.
197 Item,
198
199 /// A footnote definition. The value contained is the footnote's label by which it can
200 /// be referred to.
201 FootnoteDefinition(SharedString),
202
203 /// A table. Contains a vector describing the text-alignment for each of its columns.
204 Table(Vec<Alignment>),
205
206 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
207 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
208 TableHead,
209
210 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
211 TableRow,
212 TableCell,
213
214 // span-level tags
215 Emphasis,
216 Strong,
217 Strikethrough,
218
219 /// A link.
220 Link {
221 link_type: LinkType,
222 dest_url: SharedString,
223 title: SharedString,
224 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
225 id: SharedString,
226 },
227
228 /// An image. The first field is the link type, the second the destination URL and the third is a title,
229 /// the fourth is the link identifier.
230 Image {
231 link_type: LinkType,
232 dest_url: SharedString,
233 title: SharedString,
234 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
235 id: SharedString,
236 },
237
238 /// A metadata block.
239 MetadataBlock(MetadataBlockKind),
240
241 DefinitionList,
242 DefinitionListTitle,
243 DefinitionListDefinition,
244}
245
246#[derive(Clone, Debug, PartialEq)]
247pub enum CodeBlockKind {
248 Indented,
249 /// The value contained in the tag describes the language of the code, which may be empty.
250 Fenced(SharedString),
251}
252
253impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
254 fn from(tag: pulldown_cmark::Tag) -> Self {
255 match tag {
256 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
257 pulldown_cmark::Tag::Heading {
258 level,
259 id,
260 classes,
261 attrs,
262 } => {
263 let id = id.map(|id| SharedString::from(id.into_string()));
264 let classes = classes
265 .into_iter()
266 .map(|c| SharedString::from(c.into_string()))
267 .collect();
268 let attrs = attrs
269 .into_iter()
270 .map(|(key, value)| {
271 (
272 SharedString::from(key.into_string()),
273 value.map(|v| SharedString::from(v.into_string())),
274 )
275 })
276 .collect();
277 MarkdownTag::Heading {
278 level,
279 id,
280 classes,
281 attrs,
282 }
283 }
284 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
285 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
286 pulldown_cmark::CodeBlockKind::Indented => {
287 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
288 }
289 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
290 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
291 ),
292 },
293 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
294 pulldown_cmark::Tag::Item => MarkdownTag::Item,
295 pulldown_cmark::Tag::FootnoteDefinition(label) => {
296 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
297 }
298 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
299 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
300 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
301 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
302 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
303 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
304 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
305 pulldown_cmark::Tag::Link {
306 link_type,
307 dest_url,
308 title,
309 id,
310 } => MarkdownTag::Link {
311 link_type,
312 dest_url: SharedString::from(dest_url.into_string()),
313 title: SharedString::from(title.into_string()),
314 id: SharedString::from(id.into_string()),
315 },
316 pulldown_cmark::Tag::Image {
317 link_type,
318 dest_url,
319 title,
320 id,
321 } => MarkdownTag::Image {
322 link_type,
323 dest_url: SharedString::from(dest_url.into_string()),
324 title: SharedString::from(title.into_string()),
325 id: SharedString::from(id.into_string()),
326 },
327 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
328 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
329 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
330 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
331 pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
332 }
333 }
334}