1/*
2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.annotation.TargetApi;
33import android.content.Context;
34import android.os.Build;
35import android.support.annotation.ColorInt;
36import android.text.Spannable;
37import android.text.SpannableString;
38import android.text.SpannableStringBuilder;
39import android.text.style.ForegroundColorSpan;
40import android.util.LruCache;
41
42import java.util.ArrayList;
43import java.util.Arrays;
44import java.util.Collection;
45import java.util.Collections;
46import java.util.HashMap;
47import java.util.HashSet;
48import java.util.List;
49import java.util.Map;
50import java.util.Set;
51import java.util.regex.Matcher;
52import java.util.regex.Pattern;
53
54import eu.siacs.conversations.R;
55import eu.siacs.conversations.ui.util.StyledAttributes;
56import eu.siacs.conversations.xmpp.Jid;
57
58public class IrregularUnicodeDetector {
59
60 private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
61 private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
62 private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("а","г","е","ѕ","і","q","о","р","с","у");
63
64 static {
65 Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
66 temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
67 NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
68 }
69
70 private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
71 if (NORMALIZATION_MAP.containsKey(in)) {
72 return NORMALIZATION_MAP.get(in);
73 } else {
74 return in;
75 }
76 }
77
78 public static Spannable style(Context context, Jid jid) {
79 return style(jid, StyledAttributes.getColor(context, R.attr.color_warning));
80 }
81
82 private static Spannable style(Jid jid, @ColorInt int color) {
83 PatternTuple patternTuple = find(jid);
84 SpannableStringBuilder builder = new SpannableStringBuilder();
85 if (jid.getEscapedLocal() != null && patternTuple.local != null) {
86 SpannableString local = new SpannableString(jid.getEscapedLocal());
87 colorize(local, patternTuple.local, color);
88 builder.append(local);
89 builder.append('@');
90 }
91 if (jid.getDomain() != null) {
92 String[] labels = jid.getDomain().split("\\.");
93 for (int i = 0; i < labels.length; ++i) {
94 SpannableString spannableString = new SpannableString(labels[i]);
95 colorize(spannableString, patternTuple.domain.get(i), color);
96 if (i != 0) {
97 builder.append('.');
98 }
99 builder.append(spannableString);
100 }
101 }
102 if (builder.length() != 0 && jid.getResource() != null) {
103 builder.append('/');
104 builder.append(jid.getResource());
105 }
106 return builder;
107 }
108
109 private static void colorize(SpannableString spannableString, Pattern pattern, @ColorInt int color) {
110 Matcher matcher = pattern.matcher(spannableString);
111 while (matcher.find()) {
112 if (matcher.start() < matcher.end()) {
113 spannableString.setSpan(new ForegroundColorSpan(color), matcher.start(), matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
114 }
115 }
116 }
117
118 private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
119 Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
120 final int length = word.length();
121 for (int offset = 0; offset < length; ) {
122 final int codePoint = word.codePointAt(offset);
123 offset += Character.charCount(codePoint);
124 if (!Character.isLetter(codePoint)) {
125 continue;
126 }
127 Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
128 List<String> codePoints;
129 if (map.containsKey(block)) {
130 codePoints = map.get(block);
131 } else {
132 codePoints = new ArrayList<>();
133 map.put(block, codePoints);
134 }
135 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
136 }
137 return map;
138 }
139
140 @TargetApi(Build.VERSION_CODES.N)
141 private static Map<Character.UnicodeScript, List<String>> map(String word) {
142 Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
143 final int length = word.length();
144 for (int offset = 0; offset < length; ) {
145 final int codePoint = word.codePointAt(offset);
146 Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
147 if (script != Character.UnicodeScript.COMMON) {
148 List<String> codePoints;
149 if (map.containsKey(script)) {
150 codePoints = map.get(script);
151 } else {
152 codePoints = new ArrayList<>();
153 map.put(script, codePoints);
154 }
155 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
156 }
157 offset += Character.charCount(codePoint);
158 }
159 return map;
160 }
161
162 private static Set<String> eliminateFirstAndGetCodePointsCompat(Map<Character.UnicodeBlock, List<String>> map) {
163 return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
164 }
165
166 @TargetApi(Build.VERSION_CODES.N)
167 private static Set<String> eliminateFirstAndGetCodePoints(Map<Character.UnicodeScript, List<String>> map) {
168 return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
169 }
170
171 private static <T> Set<String> eliminateFirstAndGetCodePoints(Map<T, List<String>> map, T defaultPick) {
172 T pick = defaultPick;
173 int size = 0;
174 for (Map.Entry<T, List<String>> entry : map.entrySet()) {
175 if (entry.getValue().size() > size) {
176 size = entry.getValue().size();
177 pick = entry.getKey();
178 }
179 }
180 map.remove(pick);
181 Set<String> all = new HashSet<>();
182 for (List<String> codePoints : map.values()) {
183 all.addAll(codePoints);
184 }
185 return all;
186 }
187
188 private static Set<String> findIrregularCodePoints(String word) {
189 Set<String> codePoints;
190 if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
191 final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
192 final Set<String> set = asSet(map);
193 if (containsOnlyAmbiguousCyrillic(set)) {
194 return set;
195 }
196 codePoints = eliminateFirstAndGetCodePointsCompat(map);
197 } else {
198 final Map<Character.UnicodeScript, List<String>> map = map(word);
199 final Set<String> set = asSet(map);
200 if (containsOnlyAmbiguousCyrillic(set)) {
201 return set;
202 }
203 codePoints = eliminateFirstAndGetCodePoints(map);
204 }
205 return codePoints;
206 }
207
208 private static Set<String> asSet(Map<?, List<String>> map) {
209 final Set<String> flat = new HashSet<>();
210 for(List<String> value : map.values()) {
211 flat.addAll(value);
212 }
213 return flat;
214 }
215
216
217 private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
218 for (String codePoint : codePoints) {
219 if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
220 return false;
221 }
222 }
223 return true;
224 }
225
226 private static PatternTuple find(Jid jid) {
227 synchronized (CACHE) {
228 PatternTuple pattern = CACHE.get(jid);
229 if (pattern != null) {
230 return pattern;
231 }
232 ;
233 pattern = PatternTuple.of(jid);
234 CACHE.put(jid, pattern);
235 return pattern;
236 }
237 }
238
239 private static Pattern create(Set<String> codePoints) {
240 final StringBuilder pattern = new StringBuilder();
241 for (String codePoint : codePoints) {
242 if (pattern.length() != 0) {
243 pattern.append('|');
244 }
245 pattern.append(Pattern.quote(codePoint));
246 }
247 return Pattern.compile(pattern.toString());
248 }
249
250 private static class PatternTuple {
251 private final Pattern local;
252 private final List<Pattern> domain;
253
254 private PatternTuple(Pattern local, List<Pattern> domain) {
255 this.local = local;
256 this.domain = domain;
257 }
258
259 private static PatternTuple of(Jid jid) {
260 final Pattern localPattern;
261 if (jid.getEscapedLocal() != null) {
262 localPattern = create(findIrregularCodePoints(jid.getEscapedLocal()));
263 } else {
264 localPattern = null;
265 }
266 String domain = jid.getDomain();
267 final List<Pattern> domainPatterns = new ArrayList<>();
268 if (domain != null) {
269 for (String label : domain.split("\\.")) {
270 domainPatterns.add(create(findIrregularCodePoints(label)));
271 }
272 }
273 return new PatternTuple(localPattern, domainPatterns);
274 }
275 }
276}