Patterns.java

  1/*
  2 * Copyright (C) 2007 The Android Open Source Project
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *      http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 *
 16 *
 17 * Download latest version here:
 18 * https://android.googlesource.com/platform/frameworks/base.git/+/master/core/java/android/util/Patterns.java
 19 *
 20 *
 21 */
 22package eu.siacs.conversations.utils;
 23import java.util.regex.Matcher;
 24import java.util.regex.Pattern;
 25/**
 26 * Commonly used regular expression patterns.
 27 */
 28public class Patterns {
 29
 30    public static final Pattern XMPP_PATTERN = Pattern
 31            .compile("xmpp\\:(?:(?:["
 32                    + Patterns.GOOD_IRI_CHAR
 33                    + "\\;\\/\\?\\@\\&\\=\\#\\~\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])"
 34                    + "|(?:\\%[a-fA-F0-9]{2}))+");
 35
 36    public static final Pattern BITCOIN_URI = Pattern
 37            .compile("bitcoin\\:(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|[bB][cC]1[pPqQ][a-zA-Z0-9]{38,58})(?:\\?(?:(?:["
 38                    + Patterns.GOOD_IRI_CHAR
 39                    + "\\;\\/\\?\\@\\&\\=\\#\\~\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])"
 40                    + "|(?:\\%[a-fA-F0-9]{2}))+)?");
 41
 42    public static final Pattern BITCOINCASH_URI = Pattern
 43            .compile("bitcoincash\\:(?:[13][a-km-zA-HJ-NP-Z1-9]{33}|[qp][a-z0-9]{41})(?:\\?(?:(?:["
 44                    + Patterns.GOOD_IRI_CHAR
 45                    + "\\;\\/\\?\\@\\&\\=\\#\\~\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])"
 46                    + "|(?:\\%[a-fA-F0-9]{2}))+)?");
 47
 48    /**
 49     *  Regular expression to match all IANA top-level domains.
 50     *  List accurate as of 2011/07/18.  List taken from:
 51     *  http://data.iana.org/TLD/tlds-alpha-by-domain.txt
 52     *  This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
 53     *
 54     *  @deprecated Due to the recent profileration of gTLDs, this API is
 55     *  expected to become out-of-date very quickly. Therefore it is now
 56     *  deprecated.
 57     */
 58    @Deprecated
 59    public static final String TOP_LEVEL_DOMAIN_STR =
 60            "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
 61                    + "|(biz|b[abdefghijmnorstvwyz])"
 62                    + "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
 63                    + "|d[ejkmoz]"
 64                    + "|(edu|e[cegrstu])"
 65                    + "|f[ijkmor]"
 66                    + "|(gov|g[abdefghilmnpqrstuwy])"
 67                    + "|h[kmnrtu]"
 68                    + "|(info|int|i[delmnoqrst])"
 69                    + "|(jobs|j[emop])"
 70                    + "|k[eghimnprwyz]"
 71                    + "|l[abcikrstuvy]"
 72                    + "|(mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])"
 73                    + "|(name|net|n[acefgilopruz])"
 74                    + "|(org|om)"
 75                    + "|(pro|p[aefghklmnrstwy])"
 76                    + "|qa"
 77                    + "|r[eosuw]"
 78                    + "|s[abcdeghijklmnortuvyz]"
 79                    + "|(tel|travel|t[cdfghjklmnoprtvwz])"
 80                    + "|u[agksyz]"
 81                    + "|v[aceginu]"
 82                    + "|w[fs]"
 83                    + "|(\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae|\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435|\u0440\u0444|\u0441\u0440\u0431|\u05d8\u05e2\u05e1\u05d8|\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc|\u0625\u062e\u062a\u0628\u0627\u0631|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u092a\u0930\u0940\u0915\u094d\u0937\u093e|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u30c6\u30b9\u30c8|\u4e2d\u56fd|\u4e2d\u570b|\u53f0\u6e7e|\u53f0\u7063|\u65b0\u52a0\u5761|\u6d4b\u8bd5|\u6e2c\u8a66|\u9999\u6e2f|\ud14c\uc2a4\ud2b8|\ud55c\uad6d|xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-80akhbyknj4f|xn\\-\\-90a3ac|xn\\-\\-9t4b11yi5a|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-deba0ad|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-g6w251d|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-j6w193g|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-s9brj9c|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zckzah|xxx)"
 84                    + "|y[et]"
 85                    + "|z[amw])";
 86    /**
 87     *  Regular expression pattern to match all IANA top-level domains.
 88     *  @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}.
 89     */
 90    @Deprecated
 91    public static final Pattern TOP_LEVEL_DOMAIN =
 92            Pattern.compile(TOP_LEVEL_DOMAIN_STR);
 93    /**
 94     *  Regular expression to match all IANA top-level domains for WEB_URL.
 95     *  List accurate as of 2011/07/18.  List taken from:
 96     *  http://data.iana.org/TLD/tlds-alpha-by-domain.txt
 97     *  This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
 98     *
 99     *  @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}.
100     */
101    @Deprecated
102    public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL =
103            "(?:"
104                    + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
105                    + "|(?:biz|b[abdefghijmnorstvwyz])"
106                    + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
107                    + "|d[ejkmoz]"
108                    + "|(?:edu|e[cegrstu])"
109                    + "|f[ijkmor]"
110                    + "|(?:gov|g[abdefghilmnpqrstuwy])"
111                    + "|h[kmnrtu]"
112                    + "|(?:info|int|i[delmnoqrst])"
113                    + "|(?:jobs|j[emop])"
114                    + "|k[eghimnprwyz]"
115                    + "|l[abcikrstuvy]"
116                    + "|(?:mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])"
117                    + "|(?:name|net|n[acefgilopruz])"
118                    + "|(?:org|om)"
119                    + "|(?:pro|p[aefghklmnrstwy])"
120                    + "|qa"
121                    + "|r[eosuw]"
122                    + "|s[abcdeghijklmnortuvyz]"
123                    + "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
124                    + "|u[agksyz]"
125                    + "|v[aceginu]"
126                    + "|w[fs]"
127                    + "|(?:\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae|\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435|\u0440\u0444|\u0441\u0440\u0431|\u05d8\u05e2\u05e1\u05d8|\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc|\u0625\u062e\u062a\u0628\u0627\u0631|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u092a\u0930\u0940\u0915\u094d\u0937\u093e|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u30c6\u30b9\u30c8|\u4e2d\u56fd|\u4e2d\u570b|\u53f0\u6e7e|\u53f0\u7063|\u65b0\u52a0\u5761|\u6d4b\u8bd5|\u6e2c\u8a66|\u9999\u6e2f|\ud14c\uc2a4\ud2b8|\ud55c\uad6d|xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-80akhbyknj4f|xn\\-\\-90a3ac|xn\\-\\-9t4b11yi5a|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-deba0ad|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-g6w251d|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-j6w193g|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-s9brj9c|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zckzah|xxx)"
128                    + "|y[et]"
129                    + "|z[amw]))";
130    /**
131     *  Regular expression to match all IANA top-level domains.
132     *
133     *  List accurate as of 2015/11/24.  List taken from:
134     *  http://data.iana.org/TLD/tlds-alpha-by-domain.txt
135     *  This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
136     *
137     *  @hide
138     */
139    static final String IANA_TOP_LEVEL_DOMAINS =
140            "(?:"
141                    + "(?:aaa|aarp|abb|abbott|abogado|academy|accenture|accountant|accountants|aco|active"
142                    + "|actor|ads|adult|aeg|aero|afl|agency|aig|airforce|airtel|allfinanz|alsace|amica|amsterdam"
143                    + "|android|apartments|app|apple|aquarelle|aramco|archi|army|arpa|arte|asia|associates"
144                    + "|attorney|auction|audio|auto|autos|axa|azure|a[cdefgilmoqrstuwxz])"
145                    + "|(?:band|bank|bar|barcelona|barclaycard|barclays|bargains|bauhaus|bayern|bbc|bbva"
146                    + "|bcn|beats|beer|bentley|berlin|best|bet|bharti|bible|bid|bike|bing|bingo|bio|biz|black"
147                    + "|blackfriday|bloomberg|blue|bms|bmw|bnl|bnpparibas|boats|bom|bond|boo|boots|boutique"
148                    + "|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|build|builders|business"
149                    + "|buzz|bzh|b[abdefghijmnorstvwyz])"
150                    + "|(?:cab|cafe|cal|camera|camp|cancerresearch|canon|capetown|capital|car|caravan|cards"
151                    + "|care|career|careers|cars|cartier|casa|cash|casino|cat|catering|cba|cbn|ceb|center|ceo"
152                    + "|cern|cfa|cfd|chanel|channel|chat|cheap|chloe|christmas|chrome|church|cipriani|cisco"
153                    + "|citic|city|cityeats|claims|cleaning|click|clinic|clothing|cloud|club|clubmed|coach"
154                    + "|codes|coffee|college|cologne|com|commbank|community|company|computer|comsec|condos"
155                    + "|construction|consulting|contractors|cooking|cool|coop|corsica|country|coupons|courses"
156                    + "|credit|creditcard|creditunion|cricket|crown|crs|cruises|csc|cuisinella|cymru|cyou|c[acdfghiklmnoruvwxyz])"
157                    + "|(?:dabur|dad|dance|date|dating|datsun|day|dclk|deals|degree|delivery|dell|delta"
158                    + "|democrat|dental|dentist|desi|design|dev|diamonds|diet|digital|direct|directory|discount"
159                    + "|dnp|docs|dog|doha|domains|doosan|download|drive|durban|dvag|d[ejkmoz])"
160                    + "|(?:earth|eat|edu|education|email|emerck|energy|engineer|engineering|enterprises"
161                    + "|epson|equipment|erni|esq|estate|eurovision|eus|events|everbank|exchange|expert|exposed"
162                    + "|express|e[cegrstu])"
163                    + "|(?:fage|fail|fairwinds|faith|family|fan|fans|farm|fashion|feedback|ferrero|film"
164                    + "|final|finance|financial|firmdale|fish|fishing|fit|fitness|flights|florist|flowers|flsmidth"
165                    + "|fly|foo|football|forex|forsale|forum|foundation|frl|frogans|fund|furniture|futbol|fyi"
166                    + "|f[ijkmor])"
167                    + "|(?:gal|gallery|game|garden|gbiz|gdn|gea|gent|genting|ggee|gift|gifts|gives|giving"
168                    + "|glass|gle|global|globo|gmail|gmo|gmx|gold|goldpoint|golf|goo|goog|google|gop|gov|grainger"
169                    + "|graphics|gratis|green|gripe|group|gucci|guge|guide|guitars|guru|g[abdefghilmnpqrstuwy])"
170                    + "|(?:hamburg|hangout|haus|healthcare|help|here|hermes|hiphop|hitachi|hiv|hockey|holdings"
171                    + "|holiday|homedepot|homes|honda|horse|host|hosting|hoteles|hotmail|house|how|hsbc|hyundai"
172                    + "|h[kmnrtu])"
173                    + "|(?:ibm|icbc|ice|icu|ifm|iinet|immo|immobilien|industries|infiniti|info|ing|ink|institute"
174                    + "|insure|int|international|investments|ipiranga|irish|ist|istanbul|itau|iwc|i[delmnoqrst])"
175                    + "|(?:jaguar|java|jcb|jetzt|jewelry|jlc|jll|jobs|joburg|jprs|juegos|j[emop])"
176                    + "|(?:kaufen|kddi|kia|kim|kinder|kitchen|kiwi|koeln|komatsu|krd|kred|kyoto|k[eghimnprwyz])"
177                    + "|(?:lacaixa|lancaster|land|landrover|lasalle|lat|latrobe|law|lawyer|lds|lease|leclerc"
178                    + "|legal|lexus|lgbt|liaison|lidl|life|lifestyle|lighting|limited|limo|linde|link|live"
179                    + "|lixil|loan|loans|lol|london|lotte|lotto|love|ltd|ltda|lupin|luxe|luxury|l[abcikrstuvy])"
180                    + "|(?:madrid|maif|maison|man|management|mango|market|marketing|markets|marriott|mba"
181                    + "|media|meet|melbourne|meme|memorial|men|menu|meo|miami|microsoft|mil|mini|mma|mobi|moda"
182                    + "|moe|moi|mom|monash|money|montblanc|mormon|mortgage|moscow|motorcycles|mov|movie|movistar"
183                    + "|mtn|mtpc|mtr|museum|mutuelle|m[acdeghklmnopqrstuvwxyz])"
184                    + "|(?:nadex|nagoya|name|navy|nec|net|netbank|network|neustar|new|news|nexus|ngo|nhk"
185                    + "|nico|ninja|nissan|nokia|nra|nrw|ntt|nyc|n[acefgilopruz])"
186                    + "|(?:obi|office|okinawa|omega|one|ong|onl|online|ooo|oracle|orange|org|organic|osaka"
187                    + "|otsuka|ovh|om)"
188                    + "|(?:page|panerai|paris|partners|parts|party|pet|pharmacy|philips|photo|photography"
189                    + "|photos|physio|piaget|pics|pictet|pictures|ping|pink|pizza|place|play|playstation|plumbing"
190                    + "|plus|pohl|poker|porn|post|praxi|press|pro|prod|productions|prof|properties|property"
191                    + "|protection|pub|p[aefghklmnrstwy])"
192                    + "|(?:qpon|quebec|qa)"
193                    + "|(?:racing|realtor|realty|recipes|red|redstone|rehab|reise|reisen|reit|ren|rent|rentals"
194                    + "|repair|report|republican|rest|restaurant|review|reviews|rich|ricoh|rio|rip|rocher|rocks"
195                    + "|rodeo|rsvp|ruhr|run|rwe|ryukyu|r[eosuw])"
196                    + "|(?:saarland|sakura|sale|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|saxo"
197                    + "|sbs|sca|scb|schmidt|scholarships|school|schule|schwarz|science|scor|scot|seat|security"
198                    + "|seek|sener|services|seven|sew|sex|sexy|shiksha|shoes|show|shriram|singles|site|ski"
199                    + "|sky|skype|sncf|soccer|social|software|sohu|solar|solutions|sony|soy|space|spiegel|spreadbetting"
200                    + "|srl|stada|starhub|statoil|stc|stcgroup|stockholm|studio|study|style|sucks|supplies"
201                    + "|supply|support|surf|surgery|suzuki|swatch|swiss|sydney|systems|s[abcdeghijklmnortuvxyz])"
202                    + "|(?:tab|taipei|tatamotors|tatar|tattoo|tax|taxi|team|tech|technology|tel|telefonica"
203                    + "|temasek|tennis|thd|theater|theatre|tickets|tienda|tips|tires|tirol|today|tokyo|tools"
204                    + "|top|toray|toshiba|tours|town|toyota|toys|trade|trading|training|travel|trust|tui|t[cdfghjklmnortvwz])"
205                    + "|(?:ubs|university|uno|uol|u[agksyz])"
206                    + "|(?:vacations|vana|vegas|ventures|versicherung|vet|viajes|video|villas|vin|virgin"
207                    + "|vision|vista|vistaprint|viva|vlaanderen|vodka|vote|voting|voto|voyage|v[aceginu])"
208                    + "|(?:wales|walter|wang|watch|webcam|website|wed|wedding|weir|whoswho|wien|wiki|williamhill"
209                    + "|win|windows|wine|wme|work|works|world|wtc|wtf|w[fs])"
210                    + "|(?:\u03b5\u03bb|\u0431\u0435\u043b|\u0434\u0435\u0442\u0438|\u043a\u043e\u043c|\u043c\u043a\u0434"
211                    + "|\u043c\u043e\u043d|\u043c\u043e\u0441\u043a\u0432\u0430|\u043e\u043d\u043b\u0430\u0439\u043d"
212                    + "|\u043e\u0440\u0433|\u0440\u0443\u0441|\u0440\u0444|\u0441\u0430\u0439\u0442|\u0441\u0440\u0431"
213                    + "|\u0443\u043a\u0440|\u049b\u0430\u0437|\u0570\u0561\u0575|\u05e7\u05d5\u05dd|\u0627\u0631\u0627\u0645\u0643\u0648"
214                    + "|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629"
215                    + "|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0627\u06cc\u0631\u0627\u0646"
216                    + "|\u0628\u0627\u0632\u0627\u0631|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633"
217                    + "|\u0633\u0648\u062f\u0627\u0646|\u0633\u0648\u0631\u064a\u0629|\u0634\u0628\u0643\u0629"
218                    + "|\u0639\u0631\u0627\u0642|\u0639\u0645\u0627\u0646|\u0641\u0644\u0633\u0637\u064a\u0646"
219                    + "|\u0642\u0637\u0631|\u0643\u0648\u0645|\u0645\u0635\u0631|\u0645\u0644\u064a\u0633\u064a\u0627"
220                    + "|\u0645\u0648\u0642\u0639|\u0915\u0949\u092e|\u0928\u0947\u091f|\u092d\u093e\u0930\u0924"
221                    + "|\u0938\u0902\u0917\u0920\u0928|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4"
222                    + "|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd"
223                    + "|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e04\u0e2d\u0e21|\u0e44\u0e17\u0e22"
224                    + "|\u10d2\u10d4|\u307f\u3093\u306a|\u30b0\u30fc\u30b0\u30eb|\u30b3\u30e0|\u4e16\u754c"
225                    + "|\u4e2d\u4fe1|\u4e2d\u56fd|\u4e2d\u570b|\u4e2d\u6587\u7f51|\u4f01\u4e1a|\u4f5b\u5c71"
226                    + "|\u4fe1\u606f|\u5065\u5eb7|\u516b\u5366|\u516c\u53f8|\u516c\u76ca|\u53f0\u6e7e|\u53f0\u7063"
227                    + "|\u5546\u57ce|\u5546\u5e97|\u5546\u6807|\u5728\u7ebf|\u5927\u62ff|\u5a31\u4e50|\u5de5\u884c"
228                    + "|\u5e7f\u4e1c|\u6148\u5584|\u6211\u7231\u4f60|\u624b\u673a|\u653f\u52a1|\u653f\u5e9c"
229                    + "|\u65b0\u52a0\u5761|\u65b0\u95fb|\u65f6\u5c1a|\u673a\u6784|\u6de1\u9a6c\u9521|\u6e38\u620f"
230                    + "|\u70b9\u770b|\u79fb\u52a8|\u7ec4\u7ec7\u673a\u6784|\u7f51\u5740|\u7f51\u5e97|\u7f51\u7edc"
231                    + "|\u8c37\u6b4c|\u96c6\u56e2|\u98de\u5229\u6d66|\u9910\u5385|\u9999\u6e2f|\ub2f7\ub137"
232                    + "|\ub2f7\ucef4|\uc0bc\uc131|\ud55c\uad6d|xbox"
233                    + "|xerox|xin|xn\\-\\-11b4c3d|xn\\-\\-1qqw23a|xn\\-\\-30rr7y|xn\\-\\-3bst00m|xn\\-\\-3ds443g"
234                    + "|xn\\-\\-3e0b707e|xn\\-\\-3pxu8k|xn\\-\\-42c2d9a|xn\\-\\-45brj9c|xn\\-\\-45q11c|xn\\-\\-4gbrim"
235                    + "|xn\\-\\-55qw42g|xn\\-\\-55qx5d|xn\\-\\-6frz82g|xn\\-\\-6qq986b3xl|xn\\-\\-80adxhks"
236                    + "|xn\\-\\-80ao21a|xn\\-\\-80asehdb|xn\\-\\-80aswg|xn\\-\\-90a3ac|xn\\-\\-90ais|xn\\-\\-9dbq2a"
237                    + "|xn\\-\\-9et52u|xn\\-\\-b4w605ferd|xn\\-\\-c1avg|xn\\-\\-c2br7g|xn\\-\\-cg4bki|xn\\-\\-clchc0ea0b2g2a9gcd"
238                    + "|xn\\-\\-czr694b|xn\\-\\-czrs0t|xn\\-\\-czru2d|xn\\-\\-d1acj3b|xn\\-\\-d1alf|xn\\-\\-efvy88h"
239                    + "|xn\\-\\-estv75g|xn\\-\\-fhbei|xn\\-\\-fiq228c5hs|xn\\-\\-fiq64b|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s"
240                    + "|xn\\-\\-fjq720a|xn\\-\\-flw351e|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-gecrj9c"
241                    + "|xn\\-\\-h2brj9c|xn\\-\\-hxt814e|xn\\-\\-i1b6b1a6a2e|xn\\-\\-imr513n|xn\\-\\-io0a7i"
242                    + "|xn\\-\\-j1aef|xn\\-\\-j1amh|xn\\-\\-j6w193g|xn\\-\\-kcrx77d1x4a|xn\\-\\-kprw13d|xn\\-\\-kpry57d"
243                    + "|xn\\-\\-kput3i|xn\\-\\-l1acc|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgb9awbf|xn\\-\\-mgba3a3ejt"
244                    + "|xn\\-\\-mgba3a4f16a|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbab2bd|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e"
245                    + "|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-mgbpl2fh|xn\\-\\-mgbtx2b|xn\\-\\-mgbx4cd0ab"
246                    + "|xn\\-\\-mk1bu44c|xn\\-\\-mxtq1m|xn\\-\\-ngbc5azd|xn\\-\\-node|xn\\-\\-nqv7f|xn\\-\\-nqv7fs00ema"
247                    + "|xn\\-\\-nyqy26a|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1acf|xn\\-\\-p1ai|xn\\-\\-pgbs0dh"
248                    + "|xn\\-\\-pssy2u|xn\\-\\-q9jyb4c|xn\\-\\-qcka1pmc|xn\\-\\-qxam|xn\\-\\-rhqv96g|xn\\-\\-s9brj9c"
249                    + "|xn\\-\\-ses554g|xn\\-\\-t60b56a|xn\\-\\-tckwe|xn\\-\\-unup4y|xn\\-\\-vermgensberater\\-ctb"
250                    + "|xn\\-\\-vermgensberatung\\-pwb|xn\\-\\-vhquv|xn\\-\\-vuq861b|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a"
251                    + "|xn\\-\\-xhq521b|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-y9a3aq|xn\\-\\-yfro4i67o"
252                    + "|xn\\-\\-ygbi2ammx|xn\\-\\-zfr164b|xperia|xxx|xyz)"
253                    + "|(?:yachts|yamaxun|yandex|yodobashi|yoga|yokohama|youtube|y[et])"
254                    + "|(?:zara|zip|zone|zuerich|z[amw]))";
255    /**
256     * Kept for backward compatibility reasons.
257     *
258     * @deprecated Deprecated since it does not include all IRI characters defined in RFC 3987
259     */
260    @Deprecated
261    public static final String GOOD_IRI_CHAR =
262            "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
263    public static final Pattern IP_ADDRESS
264            = Pattern.compile(
265            "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
266                    + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
267                    + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
268                    + "|[1-9][0-9]|[0-9]))");
269
270    /**
271     * IPv6 address matcher for
272     * IPv6 addresses
273     * zero compressed IPv6 addresses (section 2.2 of rfc5952)
274     * link-local IPv6 addresses with zone index (section 11 of rfc4007)
275     * IPv4-Embedded IPv6 Address (section 2 of rfc6052)
276     * IPv4-mapped IPv6 addresses (section 2.1 of rfc2765)
277     * IPv4-translated addresses (section 2.1 of rfc2765)
278     *
279     * Taken from https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses/17871737#17871737
280     */
281    public static final Pattern IP6_ADDRESS
282            = Pattern.compile(
283                    "\\[" +
284                    "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" +
285                            "([0-9a-fA-F]{1,4}:){1,7}:|" +
286                            "([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" +
287                            "([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" +
288                            "([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" +
289                            "([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" +
290                            "([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" +
291                            "[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" +
292                            ":((:[0-9a-fA-F]{1,4}){1,7}|:)|" +
293                            "fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" +
294                            "::(ffff(:0{1,4}){0,1}:){0,1}" +
295                            "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
296                            "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" +
297                            "([0-9a-fA-F]{1,4}:){1,4}:" +
298                            "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
299                            "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" +
300                    "\\]"
301    );
302    /**
303     * Valid UCS characters defined in RFC 3987. Excludes space characters.
304     */
305    private static final String UCS_CHAR = "[" +
306            "\u00A0-\uD7FF" +
307            "\uF900-\uFDCF" +
308            "\uFDF0-\uFFEF" +
309            "\uD800\uDC00-\uD83F\uDFFD" +
310            "\uD840\uDC00-\uD87F\uDFFD" +
311            "\uD880\uDC00-\uD8BF\uDFFD" +
312            "\uD8C0\uDC00-\uD8FF\uDFFD" +
313            "\uD900\uDC00-\uD93F\uDFFD" +
314            "\uD940\uDC00-\uD97F\uDFFD" +
315            "\uD980\uDC00-\uD9BF\uDFFD" +
316            "\uD9C0\uDC00-\uD9FF\uDFFD" +
317            "\uDA00\uDC00-\uDA3F\uDFFD" +
318            "\uDA40\uDC00-\uDA7F\uDFFD" +
319            "\uDA80\uDC00-\uDABF\uDFFD" +
320            "\uDAC0\uDC00-\uDAFF\uDFFD" +
321            "\uDB00\uDC00-\uDB3F\uDFFD" +
322            "\uDB44\uDC00-\uDB7F\uDFFD" +
323            "&&[^\u00A0[\u2000-\u200A]\u2028\u2029\u202F\u3000]]";
324    /**
325     * Valid characters for IRI label defined in RFC 3987.
326     */
327    private static final String LABEL_CHAR = "a-zA-Z0-9" + UCS_CHAR;
328    /**
329     * Valid characters for IRI TLD defined in RFC 3987.
330     */
331    private static final String TLD_CHAR = "a-zA-Z" + UCS_CHAR;
332    /**
333     * RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets.
334     */
335    private static final String IRI_LABEL =
336            "[" + LABEL_CHAR + "](?:[" + LABEL_CHAR + "\\-]{0,61}[" + LABEL_CHAR + "]){0,1}";
337    /**
338     * RFC 3492 references RFC 1034 and limits Punycode algorithm output to 63 characters.
339     */
340    private static final String PUNYCODE_TLD = "xn\\-\\-[\\w\\-]{0,58}\\w";
341    private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")";
342    private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD;
343    public static final Pattern DOMAIN_NAME
344            = Pattern.compile("(" + HOST_NAME + "|" + IP6_ADDRESS + "|" + IP_ADDRESS +")");
345    private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/";
346    /* A word boundary or end of input.  This is to stop foo.sure from matching as foo.su */
347    private static final String WORD_BOUNDARY = "(?:\\b|$|^)";
348    private static final String USER_INFO = "(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
349            + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
350            + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@";
351    private static final String PORT_NUMBER = "\\:\\d{1,5}";
352    private static final String PATH_CHAR = "(?:(?:[" + LABEL_CHAR
353            + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"
354            + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_\\$])|(?:\\%[a-fA-F0-9]{2}))";
355    private static final String PATH_AND_QUERY = "\\/" + PATH_CHAR + "*";
356    /**
357     *  Regular expression pattern to match most part of RFC 3987
358     *  Internationalized URLs, aka IRIs.
359     */
360    public static final Pattern WEB_URL = Pattern.compile("("
361            + "("
362            + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")?"
363            + "(?:" + DOMAIN_NAME + ")"
364            + "(?:" + PORT_NUMBER + ")?"
365            + ")"
366            + "(" + PATH_AND_QUERY + ")?"
367            + WORD_BOUNDARY
368            + ")");
369    /**
370     * Regular expression that matches known TLDs and punycode TLDs
371     */
372    private static final String STRICT_TLD = "(?:" +
373            IANA_TOP_LEVEL_DOMAINS + "|" + PUNYCODE_TLD + ")";
374    /**
375     * Regular expression that matches host names using {@link #STRICT_TLD}
376     */
377    private static final String STRICT_HOST_NAME = "(?:(?:" + IRI_LABEL + "\\.)+"
378            + STRICT_TLD + ")";
379    /**
380     * Regular expression that matches domain names using either {@link #STRICT_HOST_NAME} or
381     * {@link #IP_ADDRESS}
382     */
383    private static final Pattern STRICT_DOMAIN_NAME
384            = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")");
385    /**
386     * Regular expression that matches domain names without a TLD
387     */
388    private static final String RELAXED_DOMAIN_NAME =
389            "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")";
390    /**
391     * Regular expression to match strings that do not start with a supported protocol. The TLDs
392     * are expected to be one of the known TLDs.
393     */
394    private static final String WEB_URL_WITHOUT_PROTOCOL = "("
395            + WORD_BOUNDARY
396            + "(?<!:\\/\\/)"
397            + "("
398            + "(?:" + STRICT_DOMAIN_NAME + ")"
399            + "(?:" + PORT_NUMBER + ")?"
400            + ")"
401            + "(?:" + PATH_AND_QUERY + ")?"
402            + ")";
403    /**
404     * Regular expression to match strings that start with a supported protocol. Rules for domain
405     * names and TLDs are more relaxed. TLDs are optional.
406     */
407    private static final String WEB_URL_WITH_PROTOCOL = "("
408            + WORD_BOUNDARY
409            + "(?:"
410            + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")"
411            + "(?:" + RELAXED_DOMAIN_NAME + ")?"
412            + "(?:" + PORT_NUMBER + ")?"
413            + ")"
414            + "(?:" + PATH_AND_QUERY + ")?"
415            + ")";
416    /**
417     * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression
418     * tries to match the URL structure with a relaxed rule for TLDs. If the string does not start
419     * with http(s):// the TLDs are expected to be one of the known TLDs.
420     *
421     * @hide
422     */
423    public static final Pattern AUTOLINK_WEB_URL = Pattern.compile(
424            "(" + WEB_URL_WITH_PROTOCOL + "|" + WEB_URL_WITHOUT_PROTOCOL + ")");
425    /**
426     * Regular expression for valid email characters. Does not include some of the valid characters
427     * defined in RFC5321: #&~!^`{}/=$*?|
428     */
429    private static final String EMAIL_CHAR = LABEL_CHAR + "\\+\\-_%'";
430    /**
431     * Regular expression for local part of an email address. RFC5321 section 4.5.3.1.1 limits
432     * the local part to be at most 64 octets.
433     */
434    private static final String EMAIL_ADDRESS_LOCAL_PART =
435            "[" + EMAIL_CHAR + "]" + "(?:[" + EMAIL_CHAR + "\\.]{1,62}[" + EMAIL_CHAR + "])?";
436    /**
437     * Regular expression for the domain part of an email address. RFC5321 section 4.5.3.1.2 limits
438     * the domain to be at most 255 octets.
439     */
440    private static final String EMAIL_ADDRESS_DOMAIN =
441            "(?=.{1,255}(?:\\s|$|^))" + HOST_NAME;
442    /**
443     * Regular expression pattern to match email addresses. It excludes double quoted local parts
444     * and the special characters #&~!^`{}/=$*?| that are included in RFC5321.
445     * @hide
446     */
447    public static final Pattern AUTOLINK_EMAIL_ADDRESS = Pattern.compile("(" + WORD_BOUNDARY +
448            "(?:" + EMAIL_ADDRESS_LOCAL_PART + "@" + EMAIL_ADDRESS_DOMAIN + ")" +
449            WORD_BOUNDARY + ")"
450    );
451    public static final Pattern EMAIL_ADDRESS
452            = Pattern.compile(
453            "[a-zA-Z0-9\\+\\.\\_\\%\\-\\+]{1,256}" +
454                    "\\@" +
455                    "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" +
456                    "(" +
457                    "\\." +
458                    "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" +
459                    ")+"
460    );
461    /**
462     * This pattern is intended for searching for things that look like they
463     * might be phone numbers in arbitrary text, not for validating whether
464     * something is in fact a phone number.  It will miss many things that
465     * are legitimate phone numbers.
466     *
467     * <p> The pattern matches the following:
468     * <ul>
469     * <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes
470     * may follow.
471     * <li>Optionally, sets of digits in parentheses, separated by spaces, dots, or dashes.
472     * <li>A string starting and ending with a digit, containing digits, spaces, dots, and/or dashes.
473     * </ul>
474     */
475    public static final Pattern PHONE
476            = Pattern.compile(                      // sdd = space, dot, or dash
477            "(\\+[0-9]+[\\- \\.]*)?"        // +<digits><sdd>*
478                    + "(\\([0-9]+\\)[\\- \\.]*)?"   // (<digits>)<sdd>*
479                    + "([0-9][0-9\\- \\.]+[0-9])"); // <digit><digit|sdd>+<digit>
480
481    public static final Pattern TEL_URI =
482        Pattern.compile("tel:(?:(?:\\+\\d+)|(?:\\d+;phone-context=" + PATH_CHAR + "+))");
483
484    public static final Pattern SMS_URI =
485        Pattern.compile("sms:(?:(?:\\+\\d+)|(?:\\d+;phone-context=" + PATH_CHAR + "+))");
486
487    /**
488     *  Convenience method to take all of the non-null matching groups in a
489     *  regex Matcher and return them as a concatenated string.
490     *
491     *  @param matcher      The Matcher object from which grouped text will
492     *                      be extracted
493     *
494     *  @return             A String comprising all of the non-null matched
495     *                      groups concatenated together
496     */
497    public static final String concatGroups(Matcher matcher) {
498        StringBuilder b = new StringBuilder();
499        final int numGroups = matcher.groupCount();
500        for (int i = 1; i <= numGroups; i++) {
501            String s = matcher.group(i);
502            if (s != null) {
503                b.append(s);
504            }
505        }
506        return b.toString();
507    }
508    /**
509     * Convenience method to return only the digits and plus signs
510     * in the matching string.
511     *
512     * @param matcher      The Matcher object from which digits and plus will
513     *                     be extracted
514     *
515     * @return             A String comprising all of the digits and plus in
516     *                     the match
517     */
518    public static final String digitsAndPlusOnly(Matcher matcher) {
519        StringBuilder buffer = new StringBuilder();
520        String matchingRegion = matcher.group();
521        for (int i = 0, size = matchingRegion.length(); i < size; i++) {
522            char character = matchingRegion.charAt(i);
523            if (character == '+' || Character.isDigit(character)) {
524                buffer.append(character);
525            }
526        }
527        return buffer.toString();
528    }
529    /**
530     * Do not create this static utility class.
531     */
532    private Patterns() {}
533}