diff --git a/crates/terminal/src/terminal_hyperlinks.rs b/crates/terminal/src/terminal_hyperlinks.rs index 25db02c5e84f692622a1c97ed891c886b02b26a9..3c20261988a7b30e124000bcdae7596c162d0853 100644 --- a/crates/terminal/src/terminal_hyperlinks.rs +++ b/crates/terminal/src/terminal_hyperlinks.rs @@ -79,7 +79,8 @@ pub(super) fn find_from_grid_point( Some((url, true, url_match)) } else if let Some(url_match) = regex_match_at(term, point, &mut regex_searches.url_regex) { let url = term.bounds_to_string(*url_match.start(), *url_match.end()); - Some((url, true, url_match)) + let (sanitized_url, sanitized_match) = sanitize_url_punctuation(url, url_match, term); + Some((sanitized_url, true, sanitized_match)) } else if let Some(python_match) = regex_match_at(term, point, &mut regex_searches.python_file_line_regex) { @@ -164,6 +165,63 @@ pub(super) fn find_from_grid_point( }) } +fn sanitize_url_punctuation( + url: String, + url_match: Match, + term: &Term, +) -> (String, Match) { + let mut sanitized_url = url; + let mut chars_trimmed = 0; + + // First, handle parentheses balancing using single traversal + let (open_parens, close_parens) = + sanitized_url + .chars() + .fold((0, 0), |(opens, closes), c| match c { + '(' => (opens + 1, closes), + ')' => (opens, closes + 1), + _ => (opens, closes), + }); + + // Trim unbalanced closing parentheses + if close_parens > open_parens { + let mut remaining_close = close_parens; + while sanitized_url.ends_with(')') && remaining_close > open_parens { + sanitized_url.pop(); + chars_trimmed += 1; + remaining_close -= 1; + } + } + + // Handle trailing periods + if sanitized_url.ends_with('.') { + let trailing_periods = sanitized_url + .chars() + .rev() + .take_while(|&c| c == '.') + .count(); + + if trailing_periods > 1 { + sanitized_url.truncate(sanitized_url.len() - trailing_periods); + chars_trimmed += trailing_periods; + } else if trailing_periods == 1 + && let Some(second_last_char) = sanitized_url.chars().rev().nth(1) + && (second_last_char.is_alphanumeric() || second_last_char == '/') + { + sanitized_url.pop(); + chars_trimmed += 1; + } + } + + if chars_trimmed > 0 { + let new_end = url_match.end().sub(term, Boundary::Grid, chars_trimmed); + let sanitized_match = Match::new(*url_match.start(), new_end); + (sanitized_url, sanitized_match) + } else { + (sanitized_url, url_match) + } +} + fn is_path_surrounded_by_common_symbols(path: &str) -> bool { // Avoid detecting `[]` or `()` strings as paths, surrounded by common symbols path.len() > 2 @@ -233,6 +291,91 @@ mod tests { ); } + #[test] + fn test_url_parentheses_sanitization() { + // Test our sanitize_url_parentheses function directly + let test_cases = vec![ + // Cases that should be sanitized (unbalanced parentheses) + ("https://www.google.com/)", "https://www.google.com/"), + ("https://example.com/path)", "https://example.com/path"), + ("https://test.com/))", "https://test.com/"), + // Cases that should NOT be sanitized (balanced parentheses) + ( + "https://en.wikipedia.org/wiki/Example_(disambiguation)", + "https://en.wikipedia.org/wiki/Example_(disambiguation)", + ), + ("https://test.com/(hello)", "https://test.com/(hello)"), + ( + "https://example.com/path(1)(2)", + "https://example.com/path(1)(2)", + ), + // Edge cases + ("https://test.com/", "https://test.com/"), + ("https://example.com", "https://example.com"), + ]; + + for (input, expected) in test_cases { + // Create a minimal terminal for testing + let term = Term::new(Config::default(), &TermSize::new(80, 24), VoidListener); + + // Create a dummy match that spans the entire input + let start_point = AlacPoint::new(Line(0), Column(0)); + let end_point = AlacPoint::new(Line(0), Column(input.len())); + let dummy_match = Match::new(start_point, end_point); + + let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term); + assert_eq!(result, expected, "Failed for input: {}", input); + } + } + + #[test] + fn test_url_periods_sanitization() { + // Test URLs with trailing periods (sentence punctuation) + let test_cases = vec![ + // Cases that should be sanitized (trailing periods likely punctuation) + ("https://example.com.", "https://example.com"), + ( + "https://github.com/zed-industries/zed.", + "https://github.com/zed-industries/zed", + ), + ( + "https://example.com/path/file.html.", + "https://example.com/path/file.html", + ), + ( + "https://example.com/file.pdf.", + "https://example.com/file.pdf", + ), + ("https://example.com:8080.", "https://example.com:8080"), + ("https://example.com..", "https://example.com"), + ( + "https://en.wikipedia.org/wiki/C.E.O.", + "https://en.wikipedia.org/wiki/C.E.O", + ), + // Cases that should NOT be sanitized (periods are part of URL structure) + ( + "https://example.com/v1.0/api", + "https://example.com/v1.0/api", + ), + ("https://192.168.1.1", "https://192.168.1.1"), + ("https://sub.domain.com", "https://sub.domain.com"), + ]; + + for (input, expected) in test_cases { + // Create a minimal terminal for testing + let term = Term::new(Config::default(), &TermSize::new(80, 24), VoidListener); + + // Create a dummy match that spans the entire input + let start_point = AlacPoint::new(Line(0), Column(0)); + let end_point = AlacPoint::new(Line(0), Column(input.len())); + let dummy_match = Match::new(start_point, end_point); + + // This test should initially fail since we haven't implemented period sanitization yet + let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term); + assert_eq!(result, expected, "Failed for input: {}", input); + } + } + #[test] fn test_word_regex() { re_test(