terminal: Re-add sanitizing trailing periods in URL detection (#38569)

Jakub Konka created

I accidentally regressed this when bumping alacritty in
https://github.com/zed-industries/zed/pull/38505

cc @davewa 

Release Notes:

- N/A

Change summary

crates/terminal/src/terminal_hyperlinks.rs | 145 +++++++++++++++++++++++
1 file changed, 144 insertions(+), 1 deletion(-)

Detailed changes

crates/terminal/src/terminal_hyperlinks.rs 🔗

@@ -79,7 +79,8 @@ pub(super) fn find_from_grid_point<T: EventListener>(
         Some((url, true, url_match))
     } else if let Some(url_match) = regex_match_at(term, point, &mut regex_searches.url_regex) {
         let url = term.bounds_to_string(*url_match.start(), *url_match.end());
-        Some((url, true, url_match))
+        let (sanitized_url, sanitized_match) = sanitize_url_punctuation(url, url_match, term);
+        Some((sanitized_url, true, sanitized_match))
     } else if let Some(python_match) =
         regex_match_at(term, point, &mut regex_searches.python_file_line_regex)
     {
@@ -164,6 +165,63 @@ pub(super) fn find_from_grid_point<T: EventListener>(
     })
 }
 
+fn sanitize_url_punctuation<T: EventListener>(
+    url: String,
+    url_match: Match,
+    term: &Term<T>,
+) -> (String, Match) {
+    let mut sanitized_url = url;
+    let mut chars_trimmed = 0;
+
+    // First, handle parentheses balancing using single traversal
+    let (open_parens, close_parens) =
+        sanitized_url
+            .chars()
+            .fold((0, 0), |(opens, closes), c| match c {
+                '(' => (opens + 1, closes),
+                ')' => (opens, closes + 1),
+                _ => (opens, closes),
+            });
+
+    // Trim unbalanced closing parentheses
+    if close_parens > open_parens {
+        let mut remaining_close = close_parens;
+        while sanitized_url.ends_with(')') && remaining_close > open_parens {
+            sanitized_url.pop();
+            chars_trimmed += 1;
+            remaining_close -= 1;
+        }
+    }
+
+    // Handle trailing periods
+    if sanitized_url.ends_with('.') {
+        let trailing_periods = sanitized_url
+            .chars()
+            .rev()
+            .take_while(|&c| c == '.')
+            .count();
+
+        if trailing_periods > 1 {
+            sanitized_url.truncate(sanitized_url.len() - trailing_periods);
+            chars_trimmed += trailing_periods;
+        } else if trailing_periods == 1
+            && let Some(second_last_char) = sanitized_url.chars().rev().nth(1)
+            && (second_last_char.is_alphanumeric() || second_last_char == '/')
+        {
+            sanitized_url.pop();
+            chars_trimmed += 1;
+        }
+    }
+
+    if chars_trimmed > 0 {
+        let new_end = url_match.end().sub(term, Boundary::Grid, chars_trimmed);
+        let sanitized_match = Match::new(*url_match.start(), new_end);
+        (sanitized_url, sanitized_match)
+    } else {
+        (sanitized_url, url_match)
+    }
+}
+
 fn is_path_surrounded_by_common_symbols(path: &str) -> bool {
     // Avoid detecting `[]` or `()` strings as paths, surrounded by common symbols
     path.len() > 2
@@ -233,6 +291,91 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_url_parentheses_sanitization() {
+        // Test our sanitize_url_parentheses function directly
+        let test_cases = vec![
+            // Cases that should be sanitized (unbalanced parentheses)
+            ("https://www.google.com/)", "https://www.google.com/"),
+            ("https://example.com/path)", "https://example.com/path"),
+            ("https://test.com/))", "https://test.com/"),
+            // Cases that should NOT be sanitized (balanced parentheses)
+            (
+                "https://en.wikipedia.org/wiki/Example_(disambiguation)",
+                "https://en.wikipedia.org/wiki/Example_(disambiguation)",
+            ),
+            ("https://test.com/(hello)", "https://test.com/(hello)"),
+            (
+                "https://example.com/path(1)(2)",
+                "https://example.com/path(1)(2)",
+            ),
+            // Edge cases
+            ("https://test.com/", "https://test.com/"),
+            ("https://example.com", "https://example.com"),
+        ];
+
+        for (input, expected) in test_cases {
+            // Create a minimal terminal for testing
+            let term = Term::new(Config::default(), &TermSize::new(80, 24), VoidListener);
+
+            // Create a dummy match that spans the entire input
+            let start_point = AlacPoint::new(Line(0), Column(0));
+            let end_point = AlacPoint::new(Line(0), Column(input.len()));
+            let dummy_match = Match::new(start_point, end_point);
+
+            let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term);
+            assert_eq!(result, expected, "Failed for input: {}", input);
+        }
+    }
+
+    #[test]
+    fn test_url_periods_sanitization() {
+        // Test URLs with trailing periods (sentence punctuation)
+        let test_cases = vec![
+            // Cases that should be sanitized (trailing periods likely punctuation)
+            ("https://example.com.", "https://example.com"),
+            (
+                "https://github.com/zed-industries/zed.",
+                "https://github.com/zed-industries/zed",
+            ),
+            (
+                "https://example.com/path/file.html.",
+                "https://example.com/path/file.html",
+            ),
+            (
+                "https://example.com/file.pdf.",
+                "https://example.com/file.pdf",
+            ),
+            ("https://example.com:8080.", "https://example.com:8080"),
+            ("https://example.com..", "https://example.com"),
+            (
+                "https://en.wikipedia.org/wiki/C.E.O.",
+                "https://en.wikipedia.org/wiki/C.E.O",
+            ),
+            // Cases that should NOT be sanitized (periods are part of URL structure)
+            (
+                "https://example.com/v1.0/api",
+                "https://example.com/v1.0/api",
+            ),
+            ("https://192.168.1.1", "https://192.168.1.1"),
+            ("https://sub.domain.com", "https://sub.domain.com"),
+        ];
+
+        for (input, expected) in test_cases {
+            // Create a minimal terminal for testing
+            let term = Term::new(Config::default(), &TermSize::new(80, 24), VoidListener);
+
+            // Create a dummy match that spans the entire input
+            let start_point = AlacPoint::new(Line(0), Column(0));
+            let end_point = AlacPoint::new(Line(0), Column(input.len()));
+            let dummy_match = Match::new(start_point, end_point);
+
+            // This test should initially fail since we haven't implemented period sanitization yet
+            let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term);
+            assert_eq!(result, expected, "Failed for input: {}", input);
+        }
+    }
+
     #[test]
     fn test_word_regex() {
         re_test(