Patterns: attempt at fixing complex xmpp URIs (typically xmpp:#mychan%irc.server@xmpp.server.tld?join) (#3115)

Romain DEP created

Change summary

src/main/java/eu/siacs/conversations/utils/Patterns.java | 9 +++++----
src/main/java/eu/siacs/conversations/utils/XmppUri.java  | 5 ++++-
2 files changed, 9 insertions(+), 5 deletions(-)

Detailed changes

src/main/java/eu/siacs/conversations/utils/Patterns.java 🔗

@@ -28,10 +28,11 @@ import java.util.regex.Pattern;
 public class Patterns {
 
     public static final Pattern XMPP_PATTERN = Pattern
-            .compile("xmpp\\:(?:(?:["
-                    + Patterns.GOOD_IRI_CHAR
-                    + "\\;\\/\\?\\@\\&\\=\\#\\~\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])"
-                    + "|(?:\\%[a-fA-F0-9]{2}))+");
+            .compile("xmpp:"
+                    + "(?:\\S+)" // any (one or more) non-whitespace prefix
+                    + "@(?:[." + Patterns.GOOD_IRI_CHAR + "])+" // domain part, added the dot
+                    + "(?:\\?[a-zA-Z]+)?" // optional action, can be any letter but no digit
+                    );
 
     /**
      *  Regular expression to match all IANA top-level domains.

src/main/java/eu/siacs/conversations/utils/XmppUri.java 🔗

@@ -91,7 +91,10 @@ public class XmppUri {
 			if (uri.getAuthority() != null) {
 				jid = uri.getAuthority();
 			} else {
-				String[] parts = uri.getSchemeSpecificPart().split("\\?");
+				// At this point, it might be safe to assume that any instance of '%' or '#' in the
+				// passed URI is the result of the user input and can safely be (should be?) encoded
+				Uri replaced = Uri.parse(lameUrlEncode(uri.toString()));
+				String[] parts = replaced.getSchemeSpecificPart().split("\\?");
 				if (parts.length > 0) {
 					jid = parts[0];
 				} else {