1/*
2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.annotation.TargetApi;
33import android.content.Context;
34import android.os.Build;
35import android.text.Spannable;
36import android.text.SpannableString;
37import android.text.SpannableStringBuilder;
38import android.text.style.ForegroundColorSpan;
39import android.util.LruCache;
40
41import androidx.annotation.ColorInt;
42
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collection;
46import java.util.Collections;
47import java.util.HashMap;
48import java.util.HashSet;
49import java.util.List;
50import java.util.Map;
51import java.util.Set;
52import java.util.regex.Matcher;
53import java.util.regex.Pattern;
54
55import eu.siacs.conversations.R;
56import eu.siacs.conversations.ui.util.StyledAttributes;
57import eu.siacs.conversations.xmpp.Jid;
58
59public class IrregularUnicodeDetector {
60
61 private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
62 private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
63 private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("ะฐ","ะณ","ะต","ั","ั","ั","ิ","ะพ","ั","ั","ั","ั
");
64
65 static {
66 Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
67 temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
68 NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
69 }
70
71 private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
72 if (NORMALIZATION_MAP.containsKey(in)) {
73 return NORMALIZATION_MAP.get(in);
74 } else {
75 return in;
76 }
77 }
78
79 public static Spannable style(Context context, Jid jid) {
80 return style(jid, StyledAttributes.getColor(context, R.attr.color_warning));
81 }
82
83 private static Spannable style(Jid jid, @ColorInt int color) {
84 PatternTuple patternTuple = find(jid);
85 SpannableStringBuilder builder = new SpannableStringBuilder();
86 if (jid.getEscapedLocal() != null && patternTuple.local != null) {
87 SpannableString local = new SpannableString(jid.getEscapedLocal());
88 colorize(local, patternTuple.local, color);
89 builder.append(local);
90 builder.append('@');
91 }
92 if (jid.getDomain() != null) {
93 String[] labels = jid.getDomain().toEscapedString().split("\\.");
94 for (int i = 0; i < labels.length; ++i) {
95 SpannableString spannableString = new SpannableString(labels[i]);
96 colorize(spannableString, patternTuple.domain.get(i), color);
97 if (i != 0) {
98 builder.append('.');
99 }
100 builder.append(spannableString);
101 }
102 }
103 if (builder.length() != 0 && jid.getResource() != null) {
104 builder.append('/');
105 builder.append(jid.getResource());
106 }
107 return builder;
108 }
109
110 private static void colorize(SpannableString spannableString, Pattern pattern, @ColorInt int color) {
111 Matcher matcher = pattern.matcher(spannableString);
112 while (matcher.find()) {
113 if (matcher.start() < matcher.end()) {
114 spannableString.setSpan(new ForegroundColorSpan(color), matcher.start(), matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
115 }
116 }
117 }
118
119 private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
120 Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
121 final int length = word.length();
122 for (int offset = 0; offset < length; ) {
123 final int codePoint = word.codePointAt(offset);
124 offset += Character.charCount(codePoint);
125 if (!Character.isLetter(codePoint)) {
126 continue;
127 }
128 Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
129 List<String> codePoints;
130 if (map.containsKey(block)) {
131 codePoints = map.get(block);
132 } else {
133 codePoints = new ArrayList<>();
134 map.put(block, codePoints);
135 }
136 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
137 }
138 return map;
139 }
140
141 @TargetApi(Build.VERSION_CODES.N)
142 private static Map<Character.UnicodeScript, List<String>> map(String word) {
143 Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
144 final int length = word.length();
145 for (int offset = 0; offset < length; ) {
146 final int codePoint = word.codePointAt(offset);
147 Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
148 if (script != Character.UnicodeScript.COMMON) {
149 List<String> codePoints;
150 if (map.containsKey(script)) {
151 codePoints = map.get(script);
152 } else {
153 codePoints = new ArrayList<>();
154 map.put(script, codePoints);
155 }
156 codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
157 }
158 offset += Character.charCount(codePoint);
159 }
160 return map;
161 }
162
163 private static Set<String> eliminateFirstAndGetCodePointsCompat(Map<Character.UnicodeBlock, List<String>> map) {
164 return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
165 }
166
167 @TargetApi(Build.VERSION_CODES.N)
168 private static Set<String> eliminateFirstAndGetCodePoints(Map<Character.UnicodeScript, List<String>> map) {
169 return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
170 }
171
172 private static <T> Set<String> eliminateFirstAndGetCodePoints(Map<T, List<String>> map, T defaultPick) {
173 T pick = defaultPick;
174 int size = 0;
175 for (Map.Entry<T, List<String>> entry : map.entrySet()) {
176 if (entry.getValue().size() > size) {
177 size = entry.getValue().size();
178 pick = entry.getKey();
179 }
180 }
181 map.remove(pick);
182 Set<String> all = new HashSet<>();
183 for (List<String> codePoints : map.values()) {
184 all.addAll(codePoints);
185 }
186 return all;
187 }
188
189 private static Set<String> findIrregularCodePoints(String word) {
190 Set<String> codePoints;
191 if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
192 final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
193 final Set<String> set = asSet(map);
194 if (containsOnlyAmbiguousCyrillic(set)) {
195 return set;
196 }
197 codePoints = eliminateFirstAndGetCodePointsCompat(map);
198 } else {
199 final Map<Character.UnicodeScript, List<String>> map = map(word);
200 final Set<String> set = asSet(map);
201 if (containsOnlyAmbiguousCyrillic(set)) {
202 return set;
203 }
204 codePoints = eliminateFirstAndGetCodePoints(map);
205 }
206 return codePoints;
207 }
208
209 private static Set<String> asSet(Map<?, List<String>> map) {
210 final Set<String> flat = new HashSet<>();
211 for(List<String> value : map.values()) {
212 flat.addAll(value);
213 }
214 return flat;
215 }
216
217
218 private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
219 for (String codePoint : codePoints) {
220 if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
221 return false;
222 }
223 }
224 return true;
225 }
226
227 private static PatternTuple find(Jid jid) {
228 synchronized (CACHE) {
229 PatternTuple pattern = CACHE.get(jid);
230 if (pattern != null) {
231 return pattern;
232 }
233 pattern = PatternTuple.of(jid);
234 CACHE.put(jid, pattern);
235 return pattern;
236 }
237 }
238
239 private static Pattern create(Set<String> codePoints) {
240 final StringBuilder pattern = new StringBuilder();
241 for (String codePoint : codePoints) {
242 if (pattern.length() != 0) {
243 pattern.append('|');
244 }
245 pattern.append(Pattern.quote(codePoint));
246 }
247 return Pattern.compile(pattern.toString());
248 }
249
250 private static class PatternTuple {
251 private final Pattern local;
252 private final List<Pattern> domain;
253
254 private PatternTuple(Pattern local, List<Pattern> domain) {
255 this.local = local;
256 this.domain = domain;
257 }
258
259 private static PatternTuple of(Jid jid) {
260 final Pattern localPattern;
261 if (jid.getEscapedLocal() != null) {
262 localPattern = create(findIrregularCodePoints(jid.getEscapedLocal()));
263 } else {
264 localPattern = null;
265 }
266 String domain = jid.getDomain().toEscapedString();
267 final List<Pattern> domainPatterns = new ArrayList<>();
268 if (domain != null) {
269 for (String label : domain.split("\\.")) {
270 domainPatterns.add(create(findIrregularCodePoints(label)));
271 }
272 }
273 return new PatternTuple(localPattern, domainPatterns);
274 }
275 }
276}