Check for local files from within surrounding parens (#44733)

KyleBarton created

Closes #18228

We parse local clickable links by looking for start/end based on
whitespace. However, this means that we don't catch links which are
embedded in parenthesis, such as in markdown syntax:
`[here's my link text](./path/to/file.txt)`

Parsing strictly against parenthesis can be problematic, because
strictly-speaking, files can have parenthesis. This is a valid file name
in at least MacOS:
`thisfilehas)parens.txt`

Therefore, this change adds a small regex layer on top of the filename
finding logic, which parses out text within parenthesis. If any are
found, they are checked for being a valid filepath. The original
filename string is also checked in order to preserve behavior.

Before:


https://github.com/user-attachments/assets/37f60335-e947-4879-9ca2-88a33f5781f5

After:



https://github.com/user-attachments/assets/bd10649e-ad74-43da-80f4-3e7fd56abd86



Release Notes:

- Improved link parsing for cases when a link is embedded in
parenthesis, e.g. markdown

Change summary

crates/editor/src/hover_links.rs | 124 +++++++++++++++++++++++++++++----
1 file changed, 108 insertions(+), 16 deletions(-)

Detailed changes

crates/editor/src/hover_links.rs 🔗

@@ -9,8 +9,10 @@ use language::{Bias, ToOffset};
 use linkify::{LinkFinder, LinkKind};
 use lsp::LanguageServerId;
 use project::{InlayId, LocationLink, Project, ResolvedPath};
+use regex::Regex;
 use settings::Settings;
-use std::ops::Range;
+use std::{ops::Range, sync::LazyLock};
+use text::OffsetRangeExt;
 use theme::ActiveTheme as _;
 use util::{ResultExt, TryFutureExt as _, maybe};
 
@@ -595,7 +597,8 @@ pub(crate) async fn find_file(
     let project = project?;
     let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot()).ok()?;
     let scope = snapshot.language_scope_at(position);
-    let (range, candidate_file_path) = surrounding_filename(snapshot, position)?;
+    let (range, candidate_file_path) = surrounding_filename(&snapshot, position)?;
+    let candidate_len = candidate_file_path.len();
 
     async fn check_path(
         candidate_file_path: &str,
@@ -612,29 +615,66 @@ pub(crate) async fn find_file(
             .filter(|s| s.is_file())
     }
 
-    if let Some(existing_path) = check_path(&candidate_file_path, &project, buffer, cx).await {
-        return Some((range, existing_path));
+    let pattern_candidates = link_pattern_file_candidates(&candidate_file_path);
+
+    for (pattern_candidate, pattern_range) in &pattern_candidates {
+        if let Some(existing_path) = check_path(&pattern_candidate, &project, buffer, cx).await {
+            let offset_range = range.to_offset(&snapshot);
+            let actual_start = offset_range.start + pattern_range.start;
+            let actual_end = offset_range.end - (candidate_len - pattern_range.end);
+            return Some((
+                snapshot.anchor_before(actual_start)..snapshot.anchor_after(actual_end),
+                existing_path,
+            ));
+        }
     }
-
     if let Some(scope) = scope {
-        for suffix in scope.path_suffixes() {
-            if candidate_file_path.ends_with(format!(".{suffix}").as_str()) {
-                continue;
-            }
+        for (pattern_candidate, pattern_range) in pattern_candidates {
+            for suffix in scope.path_suffixes() {
+                if pattern_candidate.ends_with(format!(".{suffix}").as_str()) {
+                    continue;
+                }
 
-            let suffixed_candidate = format!("{candidate_file_path}.{suffix}");
-            if let Some(existing_path) = check_path(&suffixed_candidate, &project, buffer, cx).await
-            {
-                return Some((range, existing_path));
+                let suffixed_candidate = format!("{pattern_candidate}.{suffix}");
+                if let Some(existing_path) =
+                    check_path(&suffixed_candidate, &project, buffer, cx).await
+                {
+                    let offset_range = range.to_offset(&snapshot);
+                    let actual_start = offset_range.start + pattern_range.start;
+                    let actual_end = offset_range.end - (candidate_len - pattern_range.end);
+                    return Some((
+                        snapshot.anchor_before(actual_start)..snapshot.anchor_after(actual_end),
+                        existing_path,
+                    ));
+                }
             }
         }
     }
-
     None
 }
 
+// Tries to capture potentially inlined links, like those found in markdown,
+// e.g. [LinkTitle](link_file.txt)
+// Since files can have parens, we should always return the full string
+// (literally, [LinkTitle](link_file.txt)) as a candidate.
+fn link_pattern_file_candidates(candidate: &str) -> Vec<(String, Range<usize>)> {
+    static MD_LINK_REGEX: LazyLock<Regex> =
+        LazyLock::new(|| Regex::new(r"\(([^)]*)\)").expect("Failed to create REGEX"));
+
+    let candidate_len = candidate.len();
+
+    let mut candidates = vec![(candidate.to_string(), 0..candidate_len)];
+
+    if let Some(captures) = MD_LINK_REGEX.captures(candidate) {
+        if let Some(link) = captures.get(1) {
+            candidates.push((link.as_str().to_string(), link.range()));
+        }
+    }
+    candidates
+}
+
 fn surrounding_filename(
-    snapshot: language::BufferSnapshot,
+    snapshot: &language::BufferSnapshot,
     position: text::Anchor,
 ) -> Option<(Range<text::Anchor>, String)> {
     const LIMIT: usize = 2048;
@@ -1316,6 +1356,58 @@ mod tests {
         assert_eq!(cx.opened_url(), Some("https://zed.dev/releases".into()));
     }
 
+    #[test]
+    fn test_link_pattern_file_candidates() {
+        let candidates: Vec<String> = link_pattern_file_candidates("[LinkTitle](link_file.txt)")
+            .into_iter()
+            .map(|(c, _)| c)
+            .collect();
+        assert_eq!(
+            candidates,
+            vec!["[LinkTitle](link_file.txt)", "link_file.txt",]
+        );
+        // Link title with spaces in it
+        let candidates: Vec<String> = link_pattern_file_candidates("LinkTitle](link_file.txt)")
+            .into_iter()
+            .map(|(c, _)| c)
+            .collect();
+        assert_eq!(
+            candidates,
+            vec!["LinkTitle](link_file.txt)", "link_file.txt",]
+        );
+
+        // Link with spaces
+        let candidates: Vec<String> = link_pattern_file_candidates("LinkTitle](link\\ _file.txt)")
+            .into_iter()
+            .map(|(c, _)| c)
+            .collect();
+
+        assert_eq!(
+            candidates,
+            vec!["LinkTitle](link\\ _file.txt)", "link\\ _file.txt",]
+        );
+        //
+        // Square brackets not strictly necessary
+        let candidates: Vec<String> = link_pattern_file_candidates("(link_file.txt)")
+            .into_iter()
+            .map(|(c, _)| c)
+            .collect();
+
+        assert_eq!(candidates, vec!["(link_file.txt)", "link_file.txt",]);
+
+        // No nesting
+        let candidates: Vec<String> =
+            link_pattern_file_candidates("LinkTitle](link_(link_file)file.txt)")
+                .into_iter()
+                .map(|(c, _)| c)
+                .collect();
+
+        assert_eq!(
+            candidates,
+            vec!["LinkTitle](link_(link_file)file.txt)", "link_(link_file",]
+        )
+    }
+
     #[gpui::test]
     async fn test_surrounding_filename(cx: &mut gpui::TestAppContext) {
         init_test(cx, |_| {});
@@ -1374,7 +1466,7 @@ mod tests {
                 (positions, snapshot)
             });
 
-            let result = surrounding_filename(snapshot, position);
+            let result = surrounding_filename(&snapshot, position);
 
             if let Some(expected) = expected {
                 assert!(result.is_some(), "Failed to find file path: {}", input);