markdown_preview: Support anchor link for headings (#53184)

Dong and Smit Barmase created

## What does this PR did

- Generate [GitHub-flavored heading
slugs](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#section-links)
for markdown headings
- Handle `[label](#heading)` same-document anchor links that scroll the
preview and editor to the target heading
- Handle `[label](./file.md#heading)` cross-file anchor links that open
the file, scroll the preview, and move the editor cursor to the heading


https://github.com/user-attachments/assets/ecc468bf-bed0-4543-a988-703025a61bf8

## What to test

- [ ] Create a markdown file with `[Go to section](#section-name)`
links, verify clicking scrolls preview and editor
- [ ] Create two markdown files with cross-file links like `[See
other](./other.md#heading)`, verify file opens and preview scrolls to
heading
- [ ] Verify duplicate headings produce correct slugs (`heading`,
`heading-1`)
- [ ] Verify external URLs (`https://...`) are unaffected

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [ ] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [ ] Performance impact has been considered and is acceptable

Closes #18699

Release Notes:

- Added support for anchor links for headings in Markdown Preview.

---------

Co-authored-by: Smit Barmase <heysmitbarmase@gmail.com>

Change summary

crates/markdown/src/markdown.rs                      |  23 +
crates/markdown/src/mermaid.rs                       |   2 
crates/markdown/src/parser.rs                        | 152 ++++++++++++-
crates/markdown_preview/src/markdown_preview_view.rs |  70 ++++++
crates/util/src/markdown.rs                          | 108 +++++++++
5 files changed, 332 insertions(+), 23 deletions(-)

Detailed changes

crates/markdown/src/markdown.rs πŸ”—

@@ -272,6 +272,7 @@ pub struct MarkdownOptions {
     pub parse_links_only: bool,
     pub parse_html: bool,
     pub render_mermaid_diagrams: bool,
+    pub parse_heading_slugs: bool,
 }
 
 #[derive(Clone, Copy, PartialEq, Eq)]
@@ -498,6 +499,16 @@ impl Markdown {
         self.pending_parse.is_some()
     }
 
+    pub fn scroll_to_heading(&mut self, slug: &str, cx: &mut Context<Self>) -> Option<usize> {
+        if let Some(source_index) = self.parsed_markdown.heading_slugs.get(slug).copied() {
+            self.autoscroll_request = Some(source_index);
+            cx.notify();
+            Some(source_index)
+        } else {
+            None
+        }
+    }
+
     pub fn source(&self) -> &str {
         &self.source
     }
@@ -669,6 +680,7 @@ impl Markdown {
         let should_parse_links_only = self.options.parse_links_only;
         let should_parse_html = self.options.parse_html;
         let should_render_mermaid_diagrams = self.options.render_mermaid_diagrams;
+        let should_parse_heading_slugs = self.options.parse_heading_slugs;
         let language_registry = self.language_registry.clone();
         let fallback = self.fallback_code_block_language.clone();
 
@@ -683,17 +695,20 @@ impl Markdown {
                         root_block_starts: Arc::default(),
                         html_blocks: BTreeMap::default(),
                         mermaid_diagrams: BTreeMap::default(),
+                        heading_slugs: HashMap::default(),
                     },
                     Default::default(),
                 );
             }
 
-            let parsed = parse_markdown_with_options(&source, should_parse_html);
+            let parsed =
+                parse_markdown_with_options(&source, should_parse_html, should_parse_heading_slugs);
             let events = parsed.events;
             let language_names = parsed.language_names;
             let paths = parsed.language_paths;
             let root_block_starts = parsed.root_block_starts;
             let html_blocks = parsed.html_blocks;
+            let heading_slugs = parsed.heading_slugs;
             let mermaid_diagrams = if should_render_mermaid_diagrams {
                 extract_mermaid_diagrams(&source, &events)
             } else {
@@ -760,6 +775,7 @@ impl Markdown {
                     root_block_starts: Arc::from(root_block_starts),
                     html_blocks,
                     mermaid_diagrams,
+                    heading_slugs,
                 },
                 images_by_source_offset,
             )
@@ -883,6 +899,7 @@ pub struct ParsedMarkdown {
     pub root_block_starts: Arc<[usize]>,
     pub(crate) html_blocks: BTreeMap<usize, html::html_parser::ParsedHtmlBlock>,
     pub(crate) mermaid_diagrams: BTreeMap<usize, ParsedMarkdownMermaidDiagram>,
+    pub heading_slugs: HashMap<SharedString, usize>,
 }
 
 impl ParsedMarkdown {
@@ -3120,7 +3137,7 @@ mod tests {
     #[test]
     fn test_table_checkbox_detection() {
         let md = "| Done |\n|------|\n| [x] |\n| [ ] |";
-        let events = crate::parser::parse_markdown_with_options(md, false).events;
+        let events = crate::parser::parse_markdown_with_options(md, false, false).events;
 
         let mut in_table = false;
         let mut cell_texts: Vec<String> = Vec::new();
@@ -3338,7 +3355,7 @@ mod tests {
     }
 
     fn has_code_block(markdown: &str) -> bool {
-        let parsed_data = parse_markdown_with_options(markdown, false);
+        let parsed_data = parse_markdown_with_options(markdown, false, false);
         parsed_data
             .events
             .iter()

crates/markdown/src/mermaid.rs πŸ”—

@@ -371,7 +371,7 @@ mod tests {
     #[test]
     fn test_extract_mermaid_diagrams_parses_scale() {
         let markdown = "```mermaid 150\ngraph TD;\n```\n\n```rust\nfn main() {}\n```";
-        let events = crate::parser::parse_markdown_with_options(markdown, false).events;
+        let events = crate::parser::parse_markdown_with_options(markdown, false, false).events;
         let diagrams = extract_mermaid_diagrams(markdown, &events);
 
         assert_eq!(diagrams.len(), 1);

crates/markdown/src/parser.rs πŸ”—

@@ -1,12 +1,12 @@
+use collections::{BTreeMap, HashMap, HashSet};
 use gpui::SharedString;
 use linkify::LinkFinder;
 pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
 use pulldown_cmark::{
     Alignment, CowStr, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser,
 };
-use std::{collections::BTreeMap, ops::Range, sync::Arc};
-
-use collections::HashSet;
+use std::{ops::Range, sync::Arc};
+use util::markdown::generate_heading_slug;
 
 use crate::{html, path_range::PathWithRange};
 
@@ -37,6 +37,7 @@ pub(crate) struct ParsedMarkdownData {
     pub language_paths: HashSet<Arc<str>>,
     pub root_block_starts: Vec<usize>,
     pub html_blocks: BTreeMap<usize, html::html_parser::ParsedHtmlBlock>,
+    pub heading_slugs: HashMap<SharedString, usize>,
 }
 
 impl ParseState {
@@ -80,7 +81,78 @@ impl ParseState {
     }
 }
 
-pub(crate) fn parse_markdown_with_options(text: &str, parse_html: bool) -> ParsedMarkdownData {
+const MAX_DUPLICATE_HEADING_SLUGS: usize = 128;
+
+fn build_heading_slugs(
+    source: &str,
+    events: &[(Range<usize>, MarkdownEvent)],
+) -> HashMap<SharedString, usize> {
+    let mut slugs = HashMap::default();
+    let mut slug_counts: HashMap<String, usize> = HashMap::default();
+    let mut inside_heading = false;
+    let mut heading_text = String::new();
+    let mut heading_source_start: Option<usize> = None;
+
+    for (range, event) in events {
+        match event {
+            MarkdownEvent::Start(MarkdownTag::Heading { .. }) => {
+                inside_heading = true;
+                heading_text.clear();
+                heading_source_start = None;
+            }
+            MarkdownEvent::End(MarkdownTagEnd::Heading(_)) => {
+                if inside_heading {
+                    let source_offset = heading_source_start.unwrap_or(range.start);
+                    let base_slug = generate_heading_slug(&heading_text);
+                    let count = slug_counts.entry(base_slug.clone()).or_insert(0);
+                    let mut slug = if *count == 0 {
+                        base_slug.clone()
+                    } else {
+                        format!("{base_slug}-{count}")
+                    };
+                    *count += 1;
+                    while slugs.contains_key(slug.as_str()) {
+                        let Some(count) = slug_counts.get_mut(&base_slug) else {
+                            slug.clear();
+                            break;
+                        };
+                        if *count >= MAX_DUPLICATE_HEADING_SLUGS {
+                            slug.clear();
+                            break;
+                        }
+                        slug = format!("{base_slug}-{count}");
+                        *count += 1;
+                    }
+                    if !slug.is_empty() {
+                        slugs.insert(SharedString::from(slug), source_offset);
+                    }
+                    inside_heading = false;
+                }
+            }
+            MarkdownEvent::Text | MarkdownEvent::Code if inside_heading => {
+                if heading_source_start.is_none() {
+                    heading_source_start = Some(range.start);
+                }
+                heading_text.push_str(&source[range.clone()]);
+            }
+            MarkdownEvent::SubstitutedText(substituted) if inside_heading => {
+                if heading_source_start.is_none() {
+                    heading_source_start = Some(range.start);
+                }
+                heading_text.push_str(substituted);
+            }
+            _ => {}
+        }
+    }
+
+    slugs
+}
+
+pub(crate) fn parse_markdown_with_options(
+    text: &str,
+    parse_html: bool,
+    parse_heading_slugs: bool,
+) -> ParsedMarkdownData {
     let mut state = ParseState::default();
     let mut language_names = HashSet::default();
     let mut language_paths = HashSet::default();
@@ -440,12 +512,19 @@ pub(crate) fn parse_markdown_with_options(text: &str, parse_html: bool) -> Parse
         }
     }
 
+    let heading_slugs = if parse_heading_slugs {
+        build_heading_slugs(text, &state.events)
+    } else {
+        HashMap::default()
+    };
+
     ParsedMarkdownData {
         events: state.events,
         language_names,
         language_paths,
         root_block_starts: state.root_block_starts,
         html_blocks,
+        heading_slugs,
     }
 }
 
@@ -697,7 +776,7 @@ mod tests {
     #[test]
     fn test_html_comments() {
         assert_eq!(
-            parse_markdown_with_options("  <!--\nrdoc-file=string.c\n-->\nReturns", false),
+            parse_markdown_with_options("  <!--\nrdoc-file=string.c\n-->\nReturns", false, false),
             ParsedMarkdownData {
                 events: vec![
                     (2..30, RootStart),
@@ -725,7 +804,8 @@ mod tests {
         assert_eq!(
             parse_markdown_with_options(
                 "&nbsp;&nbsp; https://some.url some \\`&#9658;\\` text",
-                false
+                false,
+                false,
             ),
             ParsedMarkdownData {
                 events: vec![
@@ -764,7 +844,8 @@ mod tests {
         assert_eq!(
             parse_markdown_with_options(
                 "You can use the [GitHub Search API](https://docs.github.com/en",
-                false
+                false,
+                false,
             )
             .events,
             vec![
@@ -797,7 +878,8 @@ mod tests {
         assert_eq!(
             parse_markdown_with_options(
                 "-- --- ... \"double quoted\" 'single quoted' ----------",
-                false
+                false,
+                false,
             ),
             ParsedMarkdownData {
                 events: vec![
@@ -830,7 +912,7 @@ mod tests {
     #[test]
     fn test_code_block_metadata() {
         assert_eq!(
-            parse_markdown_with_options("```rust\nfn main() {\n let a = 1;\n}\n```", false),
+            parse_markdown_with_options("```rust\nfn main() {\n let a = 1;\n}\n```", false, false),
             ParsedMarkdownData {
                 events: vec![
                     (0..37, RootStart),
@@ -858,7 +940,7 @@ mod tests {
             }
         );
         assert_eq!(
-            parse_markdown_with_options("    fn main() {}", false),
+            parse_markdown_with_options("    fn main() {}", false, false),
             ParsedMarkdownData {
                 events: vec![
                     (4..16, RootStart),
@@ -883,7 +965,7 @@ mod tests {
     }
 
     fn assert_code_block_does_not_emit_links(markdown: &str) {
-        let parsed = parse_markdown_with_options(markdown, false);
+        let parsed = parse_markdown_with_options(markdown, false, false);
         let mut code_block_depth = 0;
         let mut code_block_count = 0;
         let mut saw_text_inside_code_block = false;
@@ -937,7 +1019,7 @@ mod tests {
     #[test]
     fn test_metadata_blocks_do_not_affect_root_blocks() {
         assert_eq!(
-            parse_markdown_with_options("+++\ntitle = \"Example\"\n+++\n\nParagraph", false),
+            parse_markdown_with_options("+++\ntitle = \"Example\"\n+++\n\nParagraph", false, false),
             ParsedMarkdownData {
                 events: vec![
                     (27..36, RootStart),
@@ -959,7 +1041,7 @@ mod tests {
 |------|---------|
 | [x]  | Fix bug |
 | [ ]  | Add feature |";
-        let parsed = parse_markdown_with_options(markdown, false);
+        let parsed = parse_markdown_with_options(markdown, false, false);
 
         let mut in_table = false;
         let mut saw_task_list_marker = false;
@@ -1038,7 +1120,8 @@ mod tests {
         assert_eq!(
             parse_markdown_with_options(
                 "https:/\\/example.com is equivalent to https://example&#46;com!",
-                false
+                false,
+                false,
             )
             .events,
             vec![
@@ -1079,7 +1162,8 @@ mod tests {
         assert_eq!(
             parse_markdown_with_options(
                 "Visit https://example.com/cat\\/Γ©&#8205;β˜• for coffee!",
-                false
+                false,
+                false,
             )
             .events,
             [
@@ -1106,4 +1190,42 @@ mod tests {
             ]
         );
     }
+
+    #[test]
+    fn test_heading_slugs() {
+        let parsed = parse_markdown_with_options(
+            "# Hello World\n\n## Code `block`\n\n### Third Level\n\n#### Fourth Level\n\n## Hello World",
+            false,
+            true,
+        );
+        assert_eq!(parsed.heading_slugs.len(), 5);
+        assert!(parsed.heading_slugs.contains_key("hello-world"));
+        assert!(parsed.heading_slugs.contains_key("code-block"));
+        assert!(parsed.heading_slugs.contains_key("third-level"));
+        assert!(parsed.heading_slugs.contains_key("fourth-level"));
+        assert!(parsed.heading_slugs.contains_key("hello-world-1"));
+    }
+
+    #[test]
+    fn test_heading_source_index_for_slug() {
+        let parsed = parse_markdown_with_options(
+            "# Duplicate\n\nText\n\n## Duplicate\n\nMore text",
+            false,
+            true,
+        );
+        let first = parsed.heading_slugs.get("duplicate").copied();
+        let second = parsed.heading_slugs.get("duplicate-1").copied();
+        assert!(first.is_some());
+        assert!(second.is_some());
+        assert!(first.expect("first slug missing") < second.expect("second slug missing"));
+    }
+
+    #[test]
+    fn test_heading_slug_collision_with_dedup_suffix() {
+        let parsed = parse_markdown_with_options("# Foo\n\n## Foo\n\n## Foo 1", false, true);
+        assert_eq!(parsed.heading_slugs.len(), 3);
+        assert!(parsed.heading_slugs.contains_key("foo"));
+        assert!(parsed.heading_slugs.contains_key("foo-1"));
+        assert!(parsed.heading_slugs.contains_key("foo-1-1"));
+    }
 }

crates/markdown_preview/src/markdown_preview_view.rs πŸ”—

@@ -21,6 +21,7 @@ use project::search::SearchQuery;
 use settings::Settings;
 use theme_settings::ThemeSettings;
 use ui::{WithScrollbar, prelude::*};
+use util::markdown::split_local_url_fragment;
 use util::normalize_path;
 use workspace::item::{Item, ItemBufferKind, ItemHandle};
 use workspace::searchable::{
@@ -218,6 +219,7 @@ impl MarkdownPreviewView {
                     MarkdownOptions {
                         parse_html: true,
                         render_mermaid_diagrams: true,
+                        parse_heading_slugs: true,
                         ..Default::default()
                     },
                     cx,
@@ -580,8 +582,6 @@ impl MarkdownPreviewView {
         window: &mut Window,
         cx: &mut Context<Self>,
     ) -> MarkdownElement {
-        let workspace = self.workspace.clone();
-        let base_directory = self.base_directory.clone();
         let active_editor = self
             .active_editor
             .as_ref()
@@ -615,8 +615,20 @@ impl MarkdownPreviewView {
                 )
             }
         })
-        .on_url_click(move |url, window, cx| {
-            open_preview_url(url, base_directory.clone(), &workspace, window, cx);
+        .on_url_click({
+            let view_handle = cx.entity().downgrade();
+            let workspace = self.workspace.clone();
+            let base_directory = self.base_directory.clone();
+            move |url, window, cx| {
+                handle_url_click(
+                    url,
+                    &view_handle,
+                    base_directory.clone(),
+                    &workspace,
+                    window,
+                    cx,
+                );
+            }
         });
 
         if let Some(active_editor) = active_editor {
@@ -655,6 +667,56 @@ impl MarkdownPreviewView {
     }
 }
 
+fn handle_url_click(
+    url: SharedString,
+    view: &WeakEntity<MarkdownPreviewView>,
+    base_directory: Option<PathBuf>,
+    workspace: &WeakEntity<Workspace>,
+    window: &mut Window,
+    cx: &mut App,
+) {
+    let (path_part, fragment) = split_local_url_fragment(url.as_ref());
+
+    if path_part.is_empty() {
+        if let Some(fragment) = fragment {
+            let view = view.clone();
+            let slug = SharedString::from(fragment.to_string());
+            window.defer(cx, move |window, cx| {
+                if let Some(view) = view.upgrade() {
+                    let markdown = view.read(cx).markdown.clone();
+                    let active_editor = view
+                        .read(cx)
+                        .active_editor
+                        .as_ref()
+                        .map(|state| state.editor.clone());
+
+                    let source_index =
+                        markdown.update(cx, |markdown, cx| markdown.scroll_to_heading(&slug, cx));
+
+                    if let Some(source_index) = source_index {
+                        if let Some(editor) = active_editor {
+                            MarkdownPreviewView::move_cursor_to_source_index(
+                                &editor,
+                                source_index,
+                                window,
+                                cx,
+                            );
+                        }
+                    }
+                }
+            });
+        }
+    } else {
+        open_preview_url(
+            SharedString::from(path_part.to_string()),
+            base_directory,
+            workspace,
+            window,
+            cx,
+        );
+    }
+}
+
 fn open_preview_url(
     url: SharedString,
     base_directory: Option<PathBuf>,

crates/util/src/markdown.rs πŸ”—

@@ -1,5 +1,62 @@
 use std::fmt::{Display, Formatter};
 
+/// Generates a URL-friendly slug from heading text (e.g. "Hello World" β†’ "hello-world").
+pub fn generate_heading_slug(text: &str) -> String {
+    text.trim()
+        .chars()
+        .filter_map(|c| {
+            if c.is_alphanumeric() || c == '-' || c == '_' {
+                Some(c.to_lowercase().next().unwrap_or(c))
+            } else if c == ' ' {
+                Some('-')
+            } else {
+                None
+            }
+        })
+        .collect()
+}
+
+/// Returns true if the URL starts with a URI scheme (RFC 3986 Β§3.1).
+fn has_uri_scheme(url: &str) -> bool {
+    let mut chars = url.chars();
+    match chars.next() {
+        Some(c) if c.is_ascii_alphabetic() => {}
+        _ => return false,
+    }
+    for c in chars {
+        if c == ':' {
+            return true;
+        }
+        if !(c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.') {
+            return false;
+        }
+    }
+    false
+}
+
+/// Splits a relative URL into its path and `#fragment` parts.
+/// Absolute URLs are returned as-is with no fragment.
+pub fn split_local_url_fragment(url: &str) -> (&str, Option<&str>) {
+    if has_uri_scheme(url) {
+        return (url, None);
+    }
+    match url.find('#') {
+        Some(pos) => {
+            let path = &url[..pos];
+            let fragment = &url[pos + 1..];
+            (
+                path,
+                if fragment.is_empty() {
+                    None
+                } else {
+                    Some(fragment)
+                },
+            )
+        }
+        None => (url, None),
+    }
+}
+
 /// Indicates that the wrapped `String` is markdown text.
 #[derive(Debug, Clone)]
 pub struct MarkdownString(pub String);
@@ -265,4 +322,55 @@ mod tests {
             "it can't be downgraded later"
         );
     }
+
+    #[test]
+    fn test_split_local_url_fragment() {
+        assert_eq!(split_local_url_fragment("#heading"), ("", Some("heading")));
+        assert_eq!(
+            split_local_url_fragment("./file.md#heading"),
+            ("./file.md", Some("heading"))
+        );
+        assert_eq!(split_local_url_fragment("./file.md"), ("./file.md", None));
+        assert_eq!(
+            split_local_url_fragment("https://example.com#frag"),
+            ("https://example.com#frag", None)
+        );
+        assert_eq!(
+            split_local_url_fragment("mailto:user@example.com"),
+            ("mailto:user@example.com", None)
+        );
+        assert_eq!(split_local_url_fragment("#"), ("", None));
+        assert_eq!(
+            split_local_url_fragment("../other.md#section"),
+            ("../other.md", Some("section"))
+        );
+        assert_eq!(
+            split_local_url_fragment("123:not-a-scheme#frag"),
+            ("123:not-a-scheme", Some("frag"))
+        );
+    }
+
+    #[test]
+    fn test_generate_heading_slug() {
+        assert_eq!(generate_heading_slug("Hello World"), "hello-world");
+        assert_eq!(generate_heading_slug("Hello  World"), "hello--world");
+        assert_eq!(generate_heading_slug("Hello-World"), "hello-world");
+        assert_eq!(
+            generate_heading_slug("Some **bold** text"),
+            "some-bold-text"
+        );
+        assert_eq!(generate_heading_slug("Let's try with Ü"), "lets-try-with-ü");
+        assert_eq!(
+            generate_heading_slug("heading with 123 numbers"),
+            "heading-with-123-numbers"
+        );
+        assert_eq!(
+            generate_heading_slug("What about (parens)?"),
+            "what-about-parens"
+        );
+        assert_eq!(
+            generate_heading_slug("  leading spaces  "),
+            "leading-spaces"
+        );
+    }
 }