1/*
2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.annotation.TargetApi;
33import android.content.Context;
34import android.os.Build;
35import android.text.Spannable;
36import android.text.SpannableString;
37import android.text.SpannableStringBuilder;
38import android.text.style.ForegroundColorSpan;
39import android.util.LruCache;
40import androidx.annotation.ColorInt;
41import com.google.android.material.color.MaterialColors;
42import eu.siacs.conversations.xmpp.Jid;
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collection;
46import java.util.Collections;
47import java.util.HashMap;
48import java.util.HashSet;
49import java.util.List;
50import java.util.Map;
51import java.util.Set;
52import java.util.regex.Matcher;
53import java.util.regex.Pattern;
54
55public class IrregularUnicodeDetector {
56
57 private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
58 private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
59 private static final List<String> AMBIGUOUS_CYRILLIC =
60 Arrays.asList("а", "г", "е", "ѕ", "і", "ј", "ķ", "ԛ", "о", "р", "с", "у", "х");
61
62 static {
63 Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
64 temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
65 NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
66 }
67
68 private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
69 if (NORMALIZATION_MAP.containsKey(in)) {
70 return NORMALIZATION_MAP.get(in);
71 } else {
72 return in;
73 }
74 }
75
76 public static Spannable style(final Context context, Jid jid) {
77 return style(
78 jid,
79 MaterialColors.getColor(
80 context,
81 com.google.android.material.R.attr.colorError,
82 "colorError not found"));
83 }
84
85 private static Spannable style(Jid jid, @ColorInt int color) {
86 PatternTuple patternTuple = find(jid);
87 SpannableStringBuilder builder = new SpannableStringBuilder();
88 if (jid.getLocal() != null && patternTuple.local != null) {
89 SpannableString local = new SpannableString(jid.getLocal());
90 colorize(local, patternTuple.local, color);
91 builder.append(local);
92 builder.append('@');
93 }
94 if (jid.getDomain() != null) {
95 String[] labels = jid.getDomain().toString().split("\\.");
96 for (int i = 0; i < labels.length; ++i) {
97 SpannableString spannableString = new SpannableString(labels[i]);
98 colorize(spannableString, patternTuple.domain.get(i), color);
99 if (i != 0) {
100 builder.append('.');
101 }
102 builder.append(spannableString);
103 }
104 }
105 if (builder.length() != 0 && jid.getResource() != null) {
106 builder.append('/');
107 builder.append(jid.getResource());
108 }
109 return builder;
110 }
111
112 private static void colorize(
113 SpannableString spannableString, Pattern pattern, @ColorInt int color) {
114 Matcher matcher = pattern.matcher(spannableString);
115 while (matcher.find()) {
116 if (matcher.start() < matcher.end()) {
117 spannableString.setSpan(
118 new ForegroundColorSpan(color),
119 matcher.start(),
120 matcher.end(),
121 Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
122 }
123 }
124 }
125
126 private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
127 Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
128 final int length = word.length();
129 for (int offset = 0; offset < length; ) {
130 final int codePoint = word.codePointAt(offset);
131 offset += Character.charCount(codePoint);
132 if (!Character.isLetter(codePoint)) {
133 continue;
134 }
135 Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
136 List<String> codePoints;
137 if (map.containsKey(block)) {
138 codePoints = map.get(block);
139 } else {
140 codePoints = new ArrayList<>();
141 map.put(block, codePoints);
142 }
143 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
144 }
145 return map;
146 }
147
148 @TargetApi(Build.VERSION_CODES.N)
149 private static Map<Character.UnicodeScript, List<String>> map(String word) {
150 Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
151 final int length = word.length();
152 for (int offset = 0; offset < length; ) {
153 final int codePoint = word.codePointAt(offset);
154 Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
155 if (script != Character.UnicodeScript.COMMON) {
156 List<String> codePoints;
157 if (map.containsKey(script)) {
158 codePoints = map.get(script);
159 } else {
160 codePoints = new ArrayList<>();
161 map.put(script, codePoints);
162 }
163 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
164 }
165 offset += Character.charCount(codePoint);
166 }
167 return map;
168 }
169
170 private static Set<String> eliminateFirstAndGetCodePointsCompat(
171 Map<Character.UnicodeBlock, List<String>> map) {
172 return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
173 }
174
175 @TargetApi(Build.VERSION_CODES.N)
176 private static Set<String> eliminateFirstAndGetCodePoints(
177 Map<Character.UnicodeScript, List<String>> map) {
178 return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
179 }
180
181 private static <T> Set<String> eliminateFirstAndGetCodePoints(
182 Map<T, List<String>> map, T defaultPick) {
183 T pick = defaultPick;
184 int size = 0;
185 for (Map.Entry<T, List<String>> entry : map.entrySet()) {
186 if (entry.getValue().size() > size) {
187 size = entry.getValue().size();
188 pick = entry.getKey();
189 }
190 }
191 map.remove(pick);
192 Set<String> all = new HashSet<>();
193 for (List<String> codePoints : map.values()) {
194 all.addAll(codePoints);
195 }
196 return all;
197 }
198
199 private static Set<String> findIrregularCodePoints(String word) {
200 Set<String> codePoints;
201 if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
202 final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
203 final Set<String> set = asSet(map);
204 if (containsOnlyAmbiguousCyrillic(set)) {
205 return set;
206 }
207 codePoints = eliminateFirstAndGetCodePointsCompat(map);
208 } else {
209 final Map<Character.UnicodeScript, List<String>> map = map(word);
210 final Set<String> set = asSet(map);
211 if (containsOnlyAmbiguousCyrillic(set)) {
212 return set;
213 }
214 codePoints = eliminateFirstAndGetCodePoints(map);
215 }
216 return codePoints;
217 }
218
219 private static Set<String> asSet(Map<?, List<String>> map) {
220 final Set<String> flat = new HashSet<>();
221 for (List<String> value : map.values()) {
222 flat.addAll(value);
223 }
224 return flat;
225 }
226
227 private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
228 for (String codePoint : codePoints) {
229 if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
230 return false;
231 }
232 }
233 return true;
234 }
235
236 private static PatternTuple find(Jid jid) {
237 synchronized (CACHE) {
238 PatternTuple pattern = CACHE.get(jid);
239 if (pattern != null) {
240 return pattern;
241 }
242 pattern = PatternTuple.of(jid);
243 CACHE.put(jid, pattern);
244 return pattern;
245 }
246 }
247
248 private static Pattern create(Set<String> codePoints) {
249 final StringBuilder pattern = new StringBuilder();
250 for (String codePoint : codePoints) {
251 if (pattern.length() != 0) {
252 pattern.append('|');
253 }
254 pattern.append(Pattern.quote(codePoint));
255 }
256 return Pattern.compile(pattern.toString());
257 }
258
259 private static class PatternTuple {
260 private final Pattern local;
261 private final List<Pattern> domain;
262
263 private PatternTuple(Pattern local, List<Pattern> domain) {
264 this.local = local;
265 this.domain = domain;
266 }
267
268 private static PatternTuple of(Jid jid) {
269 final Pattern localPattern;
270 if (jid.getLocal() != null) {
271 localPattern = create(findIrregularCodePoints(jid.getLocal()));
272 } else {
273 localPattern = null;
274 }
275 String domain = jid.getDomain().toString();
276 final List<Pattern> domainPatterns = new ArrayList<>();
277 if (domain != null) {
278 for (String label : domain.split("\\.")) {
279 domainPatterns.add(create(findIrregularCodePoints(label)));
280 }
281 }
282 return new PatternTuple(localPattern, domainPatterns);
283 }
284 }
285}