Improved URL matching
klonfish
created
URLs with trailing special chars (e.g. slash, minus, ...) should now be
matched correctly, even when followed by a non-url char
Change summary
src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java | 19
src/main/java/eu/siacs/conversations/utils/Patterns.java | 2
2 files changed, 18 insertions(+), 3 deletions(-)
Detailed changes
@@ -123,7 +123,24 @@ public class MessageAdapter extends ArrayAdapter<Message> implements CopyTextVie
}
}
- private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> start < 1 || (cs.charAt(start - 1) != '@' && cs.charAt(start - 1) != '.' && !cs.subSequence(Math.max(0, start - 3), start).equals("://"));
+ private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> {
+ if (start > 0) {
+ if (cs.charAt(start - 1) == '@' || cs.charAt(start - 1) == '.'
+ || cs.subSequence(Math.max(0, start - 3), start).equals("://")) {
+ return false;
+ }
+ }
+
+ if (end < cs.length()) {
+ // Reject strings that were probably matched only because they contain a dot followed by
+ // by some known TLD (see also comment for WORD_BOUNDARY in Patterns.java)
+ if (Character.isAlphabetic(cs.charAt(end-1)) && Character.isAlphabetic(cs.charAt(end))) {
+ return false;
+ }
+ }
+
+ return true;
+ };
private static final Linkify.MatchFilter XMPPURI_MATCH_FILTER = (s, start, end) -> {
XmppUri uri = new XmppUri(s.subSequence(start, end).toString());
@@ -353,7 +353,6 @@ public class Patterns {
+ "(?:" + PORT_NUMBER + ")?"
+ ")"
+ "(?:" + PATH_AND_QUERY + ")?"
- + WORD_BOUNDARY
+ ")";
/**
* Regular expression to match strings that start with a supported protocol. Rules for domain
@@ -367,7 +366,6 @@ public class Patterns {
+ "(?:" + PORT_NUMBER + ")?"
+ ")"
+ "(?:" + PATH_AND_QUERY + ")?"
- + WORD_BOUNDARY
+ ")";
/**
* Regular expression pattern to match IRIs. If a string starts with http(s):// the expression