1/*
2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.annotation.TargetApi;
33import android.content.Context;
34import android.os.Build;
35import android.text.Spannable;
36import android.text.SpannableString;
37import android.text.SpannableStringBuilder;
38import android.text.style.ForegroundColorSpan;
39import android.util.LruCache;
40
41import androidx.annotation.ColorInt;
42
43import com.google.android.material.color.MaterialColors;
44
45import java.util.ArrayList;
46import java.util.Arrays;
47import java.util.Collection;
48import java.util.Collections;
49import java.util.HashMap;
50import java.util.HashSet;
51import java.util.List;
52import java.util.Map;
53import java.util.Set;
54import java.util.regex.Matcher;
55import java.util.regex.Pattern;
56
57import eu.siacs.conversations.R;
58import eu.siacs.conversations.xmpp.Jid;
59
60public class IrregularUnicodeDetector {
61
62 private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
63 private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
64 private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("а","г","е","ѕ","і","ј","ķ","ԛ","о","р","с","у","х");
65
66 static {
67 Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
68 temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
69 NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
70 }
71
72 private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
73 if (NORMALIZATION_MAP.containsKey(in)) {
74 return NORMALIZATION_MAP.get(in);
75 } else {
76 return in;
77 }
78 }
79
80 public static Spannable style(final Context context, Jid jid) {
81 return style(jid, MaterialColors.getColor(context, com.google.android.material.R.attr.colorError,"colorError not found"));
82 }
83
84 private static Spannable style(Jid jid, @ColorInt int color) {
85 PatternTuple patternTuple = find(jid);
86 SpannableStringBuilder builder = new SpannableStringBuilder();
87 if (jid.getEscapedLocal() != null && patternTuple.local != null) {
88 SpannableString local = new SpannableString(jid.getEscapedLocal());
89 colorize(local, patternTuple.local, color);
90 builder.append(local);
91 builder.append('@');
92 }
93 if (jid.getDomain() != null) {
94 String[] labels = jid.getDomain().toEscapedString().split("\\.");
95 for (int i = 0; i < labels.length; ++i) {
96 SpannableString spannableString = new SpannableString(labels[i]);
97 colorize(spannableString, patternTuple.domain.get(i), color);
98 if (i != 0) {
99 builder.append('.');
100 }
101 builder.append(spannableString);
102 }
103 }
104 if (builder.length() != 0 && jid.getResource() != null) {
105 builder.append('/');
106 builder.append(jid.getResource());
107 }
108 return builder;
109 }
110
111 private static void colorize(SpannableString spannableString, Pattern pattern, @ColorInt int color) {
112 Matcher matcher = pattern.matcher(spannableString);
113 while (matcher.find()) {
114 if (matcher.start() < matcher.end()) {
115 spannableString.setSpan(new ForegroundColorSpan(color), matcher.start(), matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
116 }
117 }
118 }
119
120 private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
121 Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
122 final int length = word.length();
123 for (int offset = 0; offset < length; ) {
124 final int codePoint = word.codePointAt(offset);
125 offset += Character.charCount(codePoint);
126 if (!Character.isLetter(codePoint)) {
127 continue;
128 }
129 Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
130 List<String> codePoints;
131 if (map.containsKey(block)) {
132 codePoints = map.get(block);
133 } else {
134 codePoints = new ArrayList<>();
135 map.put(block, codePoints);
136 }
137 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
138 }
139 return map;
140 }
141
142 @TargetApi(Build.VERSION_CODES.N)
143 private static Map<Character.UnicodeScript, List<String>> map(String word) {
144 Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
145 final int length = word.length();
146 for (int offset = 0; offset < length; ) {
147 final int codePoint = word.codePointAt(offset);
148 Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
149 if (script != Character.UnicodeScript.COMMON) {
150 List<String> codePoints;
151 if (map.containsKey(script)) {
152 codePoints = map.get(script);
153 } else {
154 codePoints = new ArrayList<>();
155 map.put(script, codePoints);
156 }
157 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
158 }
159 offset += Character.charCount(codePoint);
160 }
161 return map;
162 }
163
164 private static Set<String> eliminateFirstAndGetCodePointsCompat(Map<Character.UnicodeBlock, List<String>> map) {
165 return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
166 }
167
168 @TargetApi(Build.VERSION_CODES.N)
169 private static Set<String> eliminateFirstAndGetCodePoints(Map<Character.UnicodeScript, List<String>> map) {
170 return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
171 }
172
173 private static <T> Set<String> eliminateFirstAndGetCodePoints(Map<T, List<String>> map, T defaultPick) {
174 T pick = defaultPick;
175 int size = 0;
176 for (Map.Entry<T, List<String>> entry : map.entrySet()) {
177 if (entry.getValue().size() > size) {
178 size = entry.getValue().size();
179 pick = entry.getKey();
180 }
181 }
182 map.remove(pick);
183 Set<String> all = new HashSet<>();
184 for (List<String> codePoints : map.values()) {
185 all.addAll(codePoints);
186 }
187 return all;
188 }
189
190 private static Set<String> findIrregularCodePoints(String word) {
191 Set<String> codePoints;
192 if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
193 final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
194 final Set<String> set = asSet(map);
195 if (containsOnlyAmbiguousCyrillic(set)) {
196 return set;
197 }
198 codePoints = eliminateFirstAndGetCodePointsCompat(map);
199 } else {
200 final Map<Character.UnicodeScript, List<String>> map = map(word);
201 final Set<String> set = asSet(map);
202 if (containsOnlyAmbiguousCyrillic(set)) {
203 return set;
204 }
205 codePoints = eliminateFirstAndGetCodePoints(map);
206 }
207 return codePoints;
208 }
209
210 private static Set<String> asSet(Map<?, List<String>> map) {
211 final Set<String> flat = new HashSet<>();
212 for(List<String> value : map.values()) {
213 flat.addAll(value);
214 }
215 return flat;
216 }
217
218
219 private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
220 for (String codePoint : codePoints) {
221 if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
222 return false;
223 }
224 }
225 return true;
226 }
227
228 private static PatternTuple find(Jid jid) {
229 synchronized (CACHE) {
230 PatternTuple pattern = CACHE.get(jid);
231 if (pattern != null) {
232 return pattern;
233 }
234 pattern = PatternTuple.of(jid);
235 CACHE.put(jid, pattern);
236 return pattern;
237 }
238 }
239
240 private static Pattern create(Set<String> codePoints) {
241 final StringBuilder pattern = new StringBuilder();
242 for (String codePoint : codePoints) {
243 if (pattern.length() != 0) {
244 pattern.append('|');
245 }
246 pattern.append(Pattern.quote(codePoint));
247 }
248 return Pattern.compile(pattern.toString());
249 }
250
251 private static class PatternTuple {
252 private final Pattern local;
253 private final List<Pattern> domain;
254
255 private PatternTuple(Pattern local, List<Pattern> domain) {
256 this.local = local;
257 this.domain = domain;
258 }
259
260 private static PatternTuple of(Jid jid) {
261 final Pattern localPattern;
262 if (jid.getEscapedLocal() != null) {
263 localPattern = create(findIrregularCodePoints(jid.getEscapedLocal()));
264 } else {
265 localPattern = null;
266 }
267 String domain = jid.getDomain().toEscapedString();
268 final List<Pattern> domainPatterns = new ArrayList<>();
269 if (domain != null) {
270 for (String label : domain.split("\\.")) {
271 domainPatterns.add(create(findIrregularCodePoints(label)));
272 }
273 }
274 return new PatternTuple(localPattern, domainPatterns);
275 }
276 }
277}