Parse IPv6 URIs (#3841).

Millesimus created

Change summary

src/main/java/eu/siacs/conversations/utils/Patterns.java | 39 +++++++++
1 file changed, 36 insertions(+), 3 deletions(-)

Detailed changes

src/main/java/eu/siacs/conversations/utils/Patterns.java 🔗

@@ -254,6 +254,39 @@ public class Patterns {
                     + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
                     + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
                     + "|[1-9][0-9]|[0-9]))");
+
+    /**
+     * IPv6 address matcher for
+     * IPv6 addresses
+     * zero compressed IPv6 addresses (section 2.2 of rfc5952)
+     * link-local IPv6 addresses with zone index (section 11 of rfc4007)
+     * IPv4-Embedded IPv6 Address (section 2 of rfc6052)
+     * IPv4-mapped IPv6 addresses (section 2.1 of rfc2765)
+     * IPv4-translated addresses (section 2.1 of rfc2765)
+     *
+     * Taken from https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses/17871737#17871737
+     */
+    public static final Pattern IP6_ADDRESS
+            = Pattern.compile(
+                    "\\[" +
+                    "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" +
+                            "([0-9a-fA-F]{1,4}:){1,7}:|" +
+                            "([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" +
+                            "([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" +
+                            "([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" +
+                            "([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" +
+                            "([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" +
+                            "[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" +
+                            ":((:[0-9a-fA-F]{1,4}){1,7}|:)|" +
+                            "fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" +
+                            "::(ffff(:0{1,4}){0,1}:){0,1}" +
+                            "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
+                            "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" +
+                            "([0-9a-fA-F]{1,4}:){1,4}:" +
+                            "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
+                            "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" +
+                    "\\]"
+    );
     /**
      * Valid UCS characters defined in RFC 3987. Excludes space characters.
      */
@@ -296,7 +329,7 @@ public class Patterns {
     private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")";
     private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD;
     public static final Pattern DOMAIN_NAME
-            = Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
+            = Pattern.compile("(" + HOST_NAME + "|" + IP6_ADDRESS + "|" + IP_ADDRESS +")");
     private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/";
     /* A word boundary or end of input.  This is to stop foo.sure from matching as foo.su */
     private static final String WORD_BOUNDARY = "(?:\\b|$|^)";
@@ -341,12 +374,12 @@ public class Patterns {
      * {@link #IP_ADDRESS}
      */
     private static final Pattern STRICT_DOMAIN_NAME
-            = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")");
+            = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")");
     /**
      * Regular expression that matches domain names without a TLD
      */
     private static final String RELAXED_DOMAIN_NAME =
-            "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + ")";
+            "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")";
     /**
      * Regular expression to match strings that do not start with a supported protocol. The TLDs
      * are expected to be one of the known TLDs.