Improved URL matching

klonfish created

URLs with trailing special chars (e.g. slash, minus, ...) should now be
matched correctly, even when followed by a non-url char

Change summary

src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java | 19 
src/main/java/eu/siacs/conversations/utils/Patterns.java            |  2 
2 files changed, 18 insertions(+), 3 deletions(-)

Detailed changes

src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java 🔗

@@ -123,7 +123,24 @@ public class MessageAdapter extends ArrayAdapter<Message> implements CopyTextVie
 		}
 	}
 
-	private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> start < 1 || (cs.charAt(start - 1) != '@' && cs.charAt(start - 1) != '.' && !cs.subSequence(Math.max(0, start - 3), start).equals("://"));
+	private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> {
+		if (start > 0) {
+			if (cs.charAt(start - 1) == '@' || cs.charAt(start - 1) == '.'
+					|| cs.subSequence(Math.max(0, start - 3), start).equals("://")) {
+				return false;
+			}
+		}
+
+		if (end < cs.length()) {
+			// Reject strings that were probably matched only because they contain a dot followed by
+			// by some known TLD (see also comment for WORD_BOUNDARY in Patterns.java)
+			if (Character.isAlphabetic(cs.charAt(end-1)) && Character.isAlphabetic(cs.charAt(end))) {
+				return false;
+			}
+		}
+
+		return true;
+	};
 
 	private static final Linkify.MatchFilter XMPPURI_MATCH_FILTER = (s, start, end) -> {
 		XmppUri uri = new XmppUri(s.subSequence(start, end).toString());

src/main/java/eu/siacs/conversations/utils/Patterns.java 🔗

@@ -353,7 +353,6 @@ public class Patterns {
             + "(?:" + PORT_NUMBER + ")?"
             + ")"
             + "(?:" + PATH_AND_QUERY + ")?"
-            + WORD_BOUNDARY
             + ")";
     /**
      * Regular expression to match strings that start with a supported protocol. Rules for domain
@@ -367,7 +366,6 @@ public class Patterns {
             + "(?:" + PORT_NUMBER + ")?"
             + ")"
             + "(?:" + PATH_AND_QUERY + ")?"
-            + WORD_BOUNDARY
             + ")";
     /**
      * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression