1/*
2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.annotation.TargetApi;
33import android.content.Context;
34import android.os.Build;
35import android.text.Spannable;
36import android.text.SpannableString;
37import android.text.SpannableStringBuilder;
38import android.text.style.ForegroundColorSpan;
39import android.util.LruCache;
40import androidx.annotation.ColorInt;
41import com.google.android.material.color.MaterialColors;
42import eu.siacs.conversations.xmpp.Jid;
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collection;
46import java.util.Collections;
47import java.util.HashMap;
48import java.util.HashSet;
49import java.util.List;
50import java.util.Map;
51import java.util.Set;
52import java.util.regex.Matcher;
53import java.util.regex.Pattern;
54
55public class IrregularUnicodeDetector {
56
57 private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
58 private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
59 private static final List<String> AMBIGUOUS_CYRILLIC =
60 Arrays.asList("а", "г", "е", "ѕ", "і", "ј", "ķ", "ԛ", "о", "р", "с", "у", "х");
61
62 static {
63 Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
64 temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
65 NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
66 }
67
68 private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
69 if (NORMALIZATION_MAP.containsKey(in)) {
70 return NORMALIZATION_MAP.get(in);
71 } else {
72 return in;
73 }
74 }
75
76 public static Spannable style(final Context context, Jid jid) {
77 return style(
78 jid,
79 MaterialColors.getColor(
80 context, androidx.appcompat.R.attr.colorError, "colorError not found"));
81 }
82
83 private static Spannable style(final Jid jid, final @ColorInt int color) {
84 final var patternTuple = find(jid);
85 final var builder = new SpannableStringBuilder();
86 if (jid.getLocal() != null && patternTuple.local != null) {
87 SpannableString local = new SpannableString(jid.getLocal());
88 colorize(local, patternTuple.local, color);
89 builder.append(local);
90 builder.append('@');
91 }
92 if (jid.getDomain() != null) {
93 final var labels = jid.getDomain().toString().split("\\.");
94 for (int i = 0; i < labels.length; ++i) {
95 SpannableString spannableString = new SpannableString(labels[i]);
96 colorize(spannableString, patternTuple.domain.get(i), color);
97 if (i != 0) {
98 builder.append('.');
99 }
100 builder.append(spannableString);
101 }
102 }
103 if (builder.length() != 0 && jid.getResource() != null) {
104 builder.append('/');
105 builder.append(jid.getResource());
106 }
107 return builder;
108 }
109
110 private static void colorize(
111 SpannableString spannableString, Pattern pattern, @ColorInt int color) {
112 Matcher matcher = pattern.matcher(spannableString);
113 while (matcher.find()) {
114 if (matcher.start() < matcher.end()) {
115 spannableString.setSpan(
116 new ForegroundColorSpan(color),
117 matcher.start(),
118 matcher.end(),
119 Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
120 }
121 }
122 }
123
124 private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
125 Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
126 final int length = word.length();
127 for (int offset = 0; offset < length; ) {
128 final int codePoint = word.codePointAt(offset);
129 offset += Character.charCount(codePoint);
130 if (!Character.isLetter(codePoint)) {
131 continue;
132 }
133 Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
134 List<String> codePoints;
135 if (map.containsKey(block)) {
136 codePoints = map.get(block);
137 } else {
138 codePoints = new ArrayList<>();
139 map.put(block, codePoints);
140 }
141 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
142 }
143 return map;
144 }
145
146 @TargetApi(Build.VERSION_CODES.N)
147 private static Map<Character.UnicodeScript, List<String>> map(String word) {
148 Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
149 final int length = word.length();
150 for (int offset = 0; offset < length; ) {
151 final int codePoint = word.codePointAt(offset);
152 Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
153 if (script != Character.UnicodeScript.COMMON) {
154 List<String> codePoints;
155 if (map.containsKey(script)) {
156 codePoints = map.get(script);
157 } else {
158 codePoints = new ArrayList<>();
159 map.put(script, codePoints);
160 }
161 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
162 }
163 offset += Character.charCount(codePoint);
164 }
165 return map;
166 }
167
168 private static Set<String> eliminateFirstAndGetCodePointsCompat(
169 Map<Character.UnicodeBlock, List<String>> map) {
170 return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
171 }
172
173 @TargetApi(Build.VERSION_CODES.N)
174 private static Set<String> eliminateFirstAndGetCodePoints(
175 Map<Character.UnicodeScript, List<String>> map) {
176 return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
177 }
178
179 private static <T> Set<String> eliminateFirstAndGetCodePoints(
180 Map<T, List<String>> map, T defaultPick) {
181 T pick = defaultPick;
182 int size = 0;
183 for (Map.Entry<T, List<String>> entry : map.entrySet()) {
184 if (entry.getValue().size() > size) {
185 size = entry.getValue().size();
186 pick = entry.getKey();
187 }
188 }
189 map.remove(pick);
190 Set<String> all = new HashSet<>();
191 for (List<String> codePoints : map.values()) {
192 all.addAll(codePoints);
193 }
194 return all;
195 }
196
197 private static Set<String> findIrregularCodePoints(String word) {
198 Set<String> codePoints;
199 if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
200 final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
201 final Set<String> set = asSet(map);
202 if (containsOnlyAmbiguousCyrillic(set)) {
203 return set;
204 }
205 codePoints = eliminateFirstAndGetCodePointsCompat(map);
206 } else {
207 final Map<Character.UnicodeScript, List<String>> map = map(word);
208 final Set<String> set = asSet(map);
209 if (containsOnlyAmbiguousCyrillic(set)) {
210 return set;
211 }
212 codePoints = eliminateFirstAndGetCodePoints(map);
213 }
214 return codePoints;
215 }
216
217 private static Set<String> asSet(Map<?, List<String>> map) {
218 final Set<String> flat = new HashSet<>();
219 for (List<String> value : map.values()) {
220 flat.addAll(value);
221 }
222 return flat;
223 }
224
225 private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
226 for (String codePoint : codePoints) {
227 if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
228 return false;
229 }
230 }
231 return true;
232 }
233
234 private static PatternTuple find(Jid jid) {
235 synchronized (CACHE) {
236 PatternTuple pattern = CACHE.get(jid);
237 if (pattern != null) {
238 return pattern;
239 }
240 pattern = PatternTuple.of(jid);
241 CACHE.put(jid, pattern);
242 return pattern;
243 }
244 }
245
246 private static Pattern create(Set<String> codePoints) {
247 final StringBuilder pattern = new StringBuilder();
248 for (String codePoint : codePoints) {
249 if (pattern.length() != 0) {
250 pattern.append('|');
251 }
252 pattern.append(Pattern.quote(codePoint));
253 }
254 return Pattern.compile(pattern.toString());
255 }
256
257 private static class PatternTuple {
258 private final Pattern local;
259 private final List<Pattern> domain;
260
261 private PatternTuple(Pattern local, List<Pattern> domain) {
262 this.local = local;
263 this.domain = domain;
264 }
265
266 private static PatternTuple of(Jid jid) {
267 final Pattern localPattern;
268 if (jid.getLocal() != null) {
269 localPattern = create(findIrregularCodePoints(jid.getLocal()));
270 } else {
271 localPattern = null;
272 }
273 String domain = jid.getDomain().toString();
274 final List<Pattern> domainPatterns = new ArrayList<>();
275 if (domain != null) {
276 for (String label : domain.split("\\.")) {
277 domainPatterns.add(create(findIrregularCodePoints(label)));
278 }
279 }
280 return new PatternTuple(localPattern, domainPatterns);
281 }
282 }
283}