@@ -79,7 +79,8 @@ pub(super) fn find_from_grid_point<T: EventListener>(
Some((url, true, url_match))
} else if let Some(url_match) = regex_match_at(term, point, &mut regex_searches.url_regex) {
let url = term.bounds_to_string(*url_match.start(), *url_match.end());
- Some((url, true, url_match))
+ let (sanitized_url, sanitized_match) = sanitize_url_punctuation(url, url_match, term);
+ Some((sanitized_url, true, sanitized_match))
} else if let Some(python_match) =
regex_match_at(term, point, &mut regex_searches.python_file_line_regex)
{
@@ -164,6 +165,63 @@ pub(super) fn find_from_grid_point<T: EventListener>(
})
}
+fn sanitize_url_punctuation<T: EventListener>(
+ url: String,
+ url_match: Match,
+ term: &Term<T>,
+) -> (String, Match) {
+ let mut sanitized_url = url;
+ let mut chars_trimmed = 0;
+
+ // First, handle parentheses balancing using single traversal
+ let (open_parens, close_parens) =
+ sanitized_url
+ .chars()
+ .fold((0, 0), |(opens, closes), c| match c {
+ '(' => (opens + 1, closes),
+ ')' => (opens, closes + 1),
+ _ => (opens, closes),
+ });
+
+ // Trim unbalanced closing parentheses
+ if close_parens > open_parens {
+ let mut remaining_close = close_parens;
+ while sanitized_url.ends_with(')') && remaining_close > open_parens {
+ sanitized_url.pop();
+ chars_trimmed += 1;
+ remaining_close -= 1;
+ }
+ }
+
+ // Handle trailing periods
+ if sanitized_url.ends_with('.') {
+ let trailing_periods = sanitized_url
+ .chars()
+ .rev()
+ .take_while(|&c| c == '.')
+ .count();
+
+ if trailing_periods > 1 {
+ sanitized_url.truncate(sanitized_url.len() - trailing_periods);
+ chars_trimmed += trailing_periods;
+ } else if trailing_periods == 1
+ && let Some(second_last_char) = sanitized_url.chars().rev().nth(1)
+ && (second_last_char.is_alphanumeric() || second_last_char == '/')
+ {
+ sanitized_url.pop();
+ chars_trimmed += 1;
+ }
+ }
+
+ if chars_trimmed > 0 {
+ let new_end = url_match.end().sub(term, Boundary::Grid, chars_trimmed);
+ let sanitized_match = Match::new(*url_match.start(), new_end);
+ (sanitized_url, sanitized_match)
+ } else {
+ (sanitized_url, url_match)
+ }
+}
+
fn is_path_surrounded_by_common_symbols(path: &str) -> bool {
// Avoid detecting `[]` or `()` strings as paths, surrounded by common symbols
path.len() > 2
@@ -233,6 +291,91 @@ mod tests {
);
}
+ #[test]
+ fn test_url_parentheses_sanitization() {
+ // Test our sanitize_url_parentheses function directly
+ let test_cases = vec![
+ // Cases that should be sanitized (unbalanced parentheses)
+ ("https://www.google.com/)", "https://www.google.com/"),
+ ("https://example.com/path)", "https://example.com/path"),
+ ("https://test.com/))", "https://test.com/"),
+ // Cases that should NOT be sanitized (balanced parentheses)
+ (
+ "https://en.wikipedia.org/wiki/Example_(disambiguation)",
+ "https://en.wikipedia.org/wiki/Example_(disambiguation)",
+ ),
+ ("https://test.com/(hello)", "https://test.com/(hello)"),
+ (
+ "https://example.com/path(1)(2)",
+ "https://example.com/path(1)(2)",
+ ),
+ // Edge cases
+ ("https://test.com/", "https://test.com/"),
+ ("https://example.com", "https://example.com"),
+ ];
+
+ for (input, expected) in test_cases {
+ // Create a minimal terminal for testing
+ let term = Term::new(Config::default(), &TermSize::new(80, 24), VoidListener);
+
+ // Create a dummy match that spans the entire input
+ let start_point = AlacPoint::new(Line(0), Column(0));
+ let end_point = AlacPoint::new(Line(0), Column(input.len()));
+ let dummy_match = Match::new(start_point, end_point);
+
+ let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term);
+ assert_eq!(result, expected, "Failed for input: {}", input);
+ }
+ }
+
+ #[test]
+ fn test_url_periods_sanitization() {
+ // Test URLs with trailing periods (sentence punctuation)
+ let test_cases = vec![
+ // Cases that should be sanitized (trailing periods likely punctuation)
+ ("https://example.com.", "https://example.com"),
+ (
+ "https://github.com/zed-industries/zed.",
+ "https://github.com/zed-industries/zed",
+ ),
+ (
+ "https://example.com/path/file.html.",
+ "https://example.com/path/file.html",
+ ),
+ (
+ "https://example.com/file.pdf.",
+ "https://example.com/file.pdf",
+ ),
+ ("https://example.com:8080.", "https://example.com:8080"),
+ ("https://example.com..", "https://example.com"),
+ (
+ "https://en.wikipedia.org/wiki/C.E.O.",
+ "https://en.wikipedia.org/wiki/C.E.O",
+ ),
+ // Cases that should NOT be sanitized (periods are part of URL structure)
+ (
+ "https://example.com/v1.0/api",
+ "https://example.com/v1.0/api",
+ ),
+ ("https://192.168.1.1", "https://192.168.1.1"),
+ ("https://sub.domain.com", "https://sub.domain.com"),
+ ];
+
+ for (input, expected) in test_cases {
+ // Create a minimal terminal for testing
+ let term = Term::new(Config::default(), &TermSize::new(80, 24), VoidListener);
+
+ // Create a dummy match that spans the entire input
+ let start_point = AlacPoint::new(Line(0), Column(0));
+ let end_point = AlacPoint::new(Line(0), Column(input.len()));
+ let dummy_match = Match::new(start_point, end_point);
+
+ // This test should initially fail since we haven't implemented period sanitization yet
+ let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term);
+ assert_eq!(result, expected, "Failed for input: {}", input);
+ }
+ }
+
#[test]
fn test_word_regex() {
re_test(