1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser};
5use std::ops::Range;
6
7pub fn parse_markdown(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
8 let mut events = Vec::new();
9 let mut within_link = false;
10 for (pulldown_event, mut range) in Parser::new_ext(text, Options::all()).into_offset_iter() {
11 match pulldown_event {
12 pulldown_cmark::Event::Start(tag) => {
13 if let pulldown_cmark::Tag::Link { .. } = tag {
14 within_link = true;
15 }
16 events.push((range, MarkdownEvent::Start(tag.into())))
17 }
18 pulldown_cmark::Event::End(tag) => {
19 if let pulldown_cmark::TagEnd::Link = tag {
20 within_link = false;
21 }
22 events.push((range, MarkdownEvent::End(tag)));
23 }
24 pulldown_cmark::Event::Text(_) => {
25 // Automatically detect links in text if we're not already within a markdown
26 // link.
27 if !within_link {
28 let mut finder = LinkFinder::new();
29 finder.kinds(&[linkify::LinkKind::Url]);
30 let text_range = range.clone();
31 for link in finder.links(&text[text_range.clone()]) {
32 let link_range =
33 text_range.start + link.start()..text_range.start + link.end();
34
35 if link_range.start > range.start {
36 events.push((range.start..link_range.start, MarkdownEvent::Text));
37 }
38
39 events.push((
40 link_range.clone(),
41 MarkdownEvent::Start(MarkdownTag::Link {
42 link_type: LinkType::Autolink,
43 dest_url: SharedString::from(link.as_str().to_string()),
44 title: SharedString::default(),
45 id: SharedString::default(),
46 }),
47 ));
48 events.push((link_range.clone(), MarkdownEvent::Text));
49 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
50
51 range.start = link_range.end;
52 }
53 }
54
55 if range.start < range.end {
56 events.push((range, MarkdownEvent::Text));
57 }
58 }
59 pulldown_cmark::Event::Code(_) => {
60 range.start += 1;
61 range.end -= 1;
62 events.push((range, MarkdownEvent::Code))
63 }
64 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
65 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
66 pulldown_cmark::Event::FootnoteReference(_) => {
67 events.push((range, MarkdownEvent::FootnoteReference))
68 }
69 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
70 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
71 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
72 pulldown_cmark::Event::TaskListMarker(checked) => {
73 events.push((range, MarkdownEvent::TaskListMarker(checked)))
74 }
75 }
76 }
77 events
78}
79
80/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
81/// parse result for rendering without resorting to unsafe lifetime coercion.
82#[derive(Clone, Debug, PartialEq)]
83pub enum MarkdownEvent {
84 /// Start of a tagged element. Events that are yielded after this event
85 /// and before its corresponding `End` event are inside this element.
86 /// Start and end events are guaranteed to be balanced.
87 Start(MarkdownTag),
88 /// End of a tagged element.
89 End(MarkdownTagEnd),
90 /// A text node.
91 Text,
92 /// An inline code node.
93 Code,
94 /// An HTML node.
95 Html,
96 /// An inline HTML node.
97 InlineHtml,
98 /// A reference to a footnote with given label, which may or may not be defined
99 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
100 /// occur in any order.
101 FootnoteReference,
102 /// A soft line break.
103 SoftBreak,
104 /// A hard line break.
105 HardBreak,
106 /// A horizontal ruler.
107 Rule,
108 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
109 TaskListMarker(bool),
110}
111
112/// Tags for elements that can contain other elements.
113#[derive(Clone, Debug, PartialEq)]
114pub enum MarkdownTag {
115 /// A paragraph of text and other inline elements.
116 Paragraph,
117
118 /// A heading, with optional identifier, classes and custom attributes.
119 /// The identifier is prefixed with `#` and the last one in the attributes
120 /// list is chosen, classes are prefixed with `.` and custom attributes
121 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
122 Heading {
123 level: HeadingLevel,
124 id: Option<SharedString>,
125 classes: Vec<SharedString>,
126 /// The first item of the tuple is the attr and second one the value.
127 attrs: Vec<(SharedString, Option<SharedString>)>,
128 },
129
130 BlockQuote,
131
132 /// A code block.
133 CodeBlock(CodeBlockKind),
134
135 /// A HTML block.
136 HtmlBlock,
137
138 /// A list. If the list is ordered the field indicates the number of the first item.
139 /// Contains only list items.
140 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
141
142 /// A list item.
143 Item,
144
145 /// A footnote definition. The value contained is the footnote's label by which it can
146 /// be referred to.
147 FootnoteDefinition(SharedString),
148
149 /// A table. Contains a vector describing the text-alignment for each of its columns.
150 Table(Vec<Alignment>),
151
152 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
153 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
154 TableHead,
155
156 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
157 TableRow,
158 TableCell,
159
160 // span-level tags
161 Emphasis,
162 Strong,
163 Strikethrough,
164
165 /// A link.
166 Link {
167 link_type: LinkType,
168 dest_url: SharedString,
169 title: SharedString,
170 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
171 id: SharedString,
172 },
173
174 /// An image. The first field is the link type, the second the destination URL and the third is a title,
175 /// the fourth is the link identifier.
176 Image {
177 link_type: LinkType,
178 dest_url: SharedString,
179 title: SharedString,
180 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
181 id: SharedString,
182 },
183
184 /// A metadata block.
185 MetadataBlock(MetadataBlockKind),
186}
187
188#[derive(Clone, Debug, PartialEq)]
189pub enum CodeBlockKind {
190 Indented,
191 /// The value contained in the tag describes the language of the code, which may be empty.
192 Fenced(SharedString),
193}
194
195impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
196 fn from(tag: pulldown_cmark::Tag) -> Self {
197 match tag {
198 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
199 pulldown_cmark::Tag::Heading {
200 level,
201 id,
202 classes,
203 attrs,
204 } => {
205 let id = id.map(|id| SharedString::from(id.into_string()));
206 let classes = classes
207 .into_iter()
208 .map(|c| SharedString::from(c.into_string()))
209 .collect();
210 let attrs = attrs
211 .into_iter()
212 .map(|(key, value)| {
213 (
214 SharedString::from(key.into_string()),
215 value.map(|v| SharedString::from(v.into_string())),
216 )
217 })
218 .collect();
219 MarkdownTag::Heading {
220 level,
221 id,
222 classes,
223 attrs,
224 }
225 }
226 pulldown_cmark::Tag::BlockQuote => MarkdownTag::BlockQuote,
227 pulldown_cmark::Tag::CodeBlock(kind) => match kind {
228 pulldown_cmark::CodeBlockKind::Indented => {
229 MarkdownTag::CodeBlock(CodeBlockKind::Indented)
230 }
231 pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
232 CodeBlockKind::Fenced(SharedString::from(info.into_string())),
233 ),
234 },
235 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
236 pulldown_cmark::Tag::Item => MarkdownTag::Item,
237 pulldown_cmark::Tag::FootnoteDefinition(label) => {
238 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
239 }
240 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
241 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
242 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
243 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
244 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
245 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
246 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
247 pulldown_cmark::Tag::Link {
248 link_type,
249 dest_url,
250 title,
251 id,
252 } => MarkdownTag::Link {
253 link_type,
254 dest_url: SharedString::from(dest_url.into_string()),
255 title: SharedString::from(title.into_string()),
256 id: SharedString::from(id.into_string()),
257 },
258 pulldown_cmark::Tag::Image {
259 link_type,
260 dest_url,
261 title,
262 id,
263 } => MarkdownTag::Image {
264 link_type,
265 dest_url: SharedString::from(dest_url.into_string()),
266 title: SharedString::from(title.into_string()),
267 id: SharedString::from(id.into_string()),
268 },
269 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
270 pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
271 }
272 }
273}