diff --git a/crates/terminal/src/terminal_hyperlinks.rs b/crates/terminal/src/terminal_hyperlinks.rs index cff27c4567cca84b2310723bf73bfda8d58c166d..8ff33895251f707c8bc9a7894bd74b0bb323ae6c 100644 --- a/crates/terminal/src/terminal_hyperlinks.rs +++ b/crates/terminal/src/terminal_hyperlinks.rs @@ -160,8 +160,8 @@ fn sanitize_url_punctuation( let mut sanitized_url = url; let mut chars_trimmed = 0; - // First, handle parentheses balancing using single traversal - let (open_parens, close_parens) = + // Count parentheses in the URL + let (open_parens, mut close_parens) = sanitized_url .chars() .fold((0, 0), |(opens, closes), c| match c { @@ -170,33 +170,27 @@ fn sanitize_url_punctuation( _ => (opens, closes), }); - // Trim unbalanced closing parentheses - if close_parens > open_parens { - let mut remaining_close = close_parens; - while sanitized_url.ends_with(')') && remaining_close > open_parens { - sanitized_url.pop(); - chars_trimmed += 1; - remaining_close -= 1; - } - } + // Remove trailing characters that shouldn't be at the end of URLs + while let Some(last_char) = sanitized_url.chars().last() { + let should_remove = match last_char { + // These may be part of a URL but not at the end. It's not that the spec + // doesn't allow them, but they are frequently used in plain text as delimiters + // where they're not meant to be part of the URL. + '.' | ',' | ':' | ';' => true, + '(' => true, + ')' if close_parens > open_parens => { + close_parens -= 1; + + true + } + _ => false, + }; - // Handle trailing periods - if sanitized_url.ends_with('.') { - let trailing_periods = sanitized_url - .chars() - .rev() - .take_while(|&c| c == '.') - .count(); - - if trailing_periods > 1 { - sanitized_url.truncate(sanitized_url.len() - trailing_periods); - chars_trimmed += trailing_periods; - } else if trailing_periods == 1 - && let Some(second_last_char) = sanitized_url.chars().rev().nth(1) - && (second_last_char.is_alphanumeric() || second_last_char == '/') - { + if should_remove { sanitized_url.pop(); chars_trimmed += 1; + } else { + break; } } @@ -413,6 +407,8 @@ mod tests { ("https://www.google.com/)", "https://www.google.com/"), ("https://example.com/path)", "https://example.com/path"), ("https://test.com/))", "https://test.com/"), + ("https://test.com/(((", "https://test.com/"), + ("https://test.com/(test)(", "https://test.com/(test)"), // Cases that should NOT be sanitized (balanced parentheses) ( "https://en.wikipedia.org/wiki/Example_(disambiguation)", @@ -443,10 +439,10 @@ mod tests { } #[test] - fn test_url_periods_sanitization() { - // Test URLs with trailing periods (sentence punctuation) + fn test_url_punctuation_sanitization() { + // Test URLs with trailing punctuation (sentence/text punctuation) + // The sanitize_url_punctuation function removes ., ,, :, ;, from the end let test_cases = vec![ - // Cases that should be sanitized (trailing periods likely punctuation) ("https://example.com.", "https://example.com"), ( "https://github.com/zed-industries/zed.", @@ -466,13 +462,36 @@ mod tests { "https://en.wikipedia.org/wiki/C.E.O.", "https://en.wikipedia.org/wiki/C.E.O", ), - // Cases that should NOT be sanitized (periods are part of URL structure) + ("https://example.com,", "https://example.com"), + ("https://example.com/path,", "https://example.com/path"), + ("https://example.com,,", "https://example.com"), + ("https://example.com:", "https://example.com"), + ("https://example.com/path:", "https://example.com/path"), + ("https://example.com::", "https://example.com"), + ("https://example.com;", "https://example.com"), + ("https://example.com/path;", "https://example.com/path"), + ("https://example.com;;", "https://example.com"), + ("https://example.com.,", "https://example.com"), + ("https://example.com.:;", "https://example.com"), + ("https://example.com!.", "https://example.com!"), + ("https://example.com/).", "https://example.com/"), + ("https://example.com/);", "https://example.com/"), + ("https://example.com/;)", "https://example.com/"), ( "https://example.com/v1.0/api", "https://example.com/v1.0/api", ), ("https://192.168.1.1", "https://192.168.1.1"), ("https://sub.domain.com", "https://sub.domain.com"), + ( + "https://example.com?query=value", + "https://example.com?query=value", + ), + ("https://example.com?a=1&b=2", "https://example.com?a=1&b=2"), + ( + "https://example.com/path:8080", + "https://example.com/path:8080", + ), ]; for (input, expected) in test_cases { @@ -484,7 +503,6 @@ mod tests { let end_point = AlacPoint::new(Line(0), Column(input.len())); let dummy_match = Match::new(start_point, end_point); - // This test should initially fail since we haven't implemented period sanitization yet let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term); assert_eq!(result, expected, "Failed for input: {}", input); }