IrregularUnicodeDetector.java

  1/*
  2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 *
  7 * 1. Redistributions of source code must retain the above copyright notice, this
  8 * list of conditions and the following disclaimer.
  9 *
 10 * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 * this list of conditions and the following disclaimer in the documentation and/or
 12 * other materials provided with the distribution.
 13 *
 14 * 3. Neither the name of the copyright holder nor the names of its contributors
 15 * may be used to endorse or promote products derived from this software without
 16 * specific prior written permission.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 */
 29
 30package eu.siacs.conversations.utils;
 31
 32import android.annotation.TargetApi;
 33import android.content.Context;
 34import android.os.Build;
 35import android.text.Spannable;
 36import android.text.SpannableString;
 37import android.text.SpannableStringBuilder;
 38import android.text.style.ForegroundColorSpan;
 39import android.util.LruCache;
 40
 41import androidx.annotation.ColorInt;
 42
 43import com.google.android.material.color.MaterialColors;
 44
 45import java.util.ArrayList;
 46import java.util.Arrays;
 47import java.util.Collection;
 48import java.util.Collections;
 49import java.util.HashMap;
 50import java.util.HashSet;
 51import java.util.List;
 52import java.util.Map;
 53import java.util.Set;
 54import java.util.regex.Matcher;
 55import java.util.regex.Pattern;
 56
 57import eu.siacs.conversations.R;
 58import eu.siacs.conversations.xmpp.Jid;
 59
 60public class IrregularUnicodeDetector {
 61
 62	private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
 63	private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
 64	private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("а","г","е","ѕ","і","ј","ķ","ԛ","о","р","с","у","х");
 65
 66	static {
 67		Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
 68		temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
 69		NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
 70	}
 71
 72	private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
 73		if (NORMALIZATION_MAP.containsKey(in)) {
 74			return NORMALIZATION_MAP.get(in);
 75		} else {
 76			return in;
 77		}
 78	}
 79
 80	public static Spannable style(final Context context, Jid jid) {
 81		return style(jid, MaterialColors.getColor(context, com.google.android.material.R.attr.colorError,"colorError not found"));
 82	}
 83
 84	private static Spannable style(Jid jid, @ColorInt int color) {
 85		PatternTuple patternTuple = find(jid);
 86		SpannableStringBuilder builder = new SpannableStringBuilder();
 87		if (jid.getEscapedLocal() != null && patternTuple.local != null) {
 88			SpannableString local = new SpannableString(jid.getEscapedLocal());
 89			colorize(local, patternTuple.local, color);
 90			builder.append(local);
 91			builder.append('@');
 92		}
 93		if (jid.getDomain() != null) {
 94			String[] labels = jid.getDomain().toEscapedString().split("\\.");
 95			for (int i = 0; i < labels.length; ++i) {
 96				SpannableString spannableString = new SpannableString(labels[i]);
 97				if (patternTuple.domain.size() > i) {
 98					colorize(spannableString, patternTuple.domain.get(i), color);
 99				}
100				if (i != 0) {
101					builder.append('.');
102				}
103				builder.append(spannableString);
104			}
105		}
106		if (builder.length() != 0 && jid.getResource() != null) {
107			builder.append('/');
108			builder.append(jid.getResource());
109		}
110		return builder;
111	}
112
113	private static void colorize(SpannableString spannableString, Pattern pattern, @ColorInt int color) {
114		Matcher matcher = pattern.matcher(spannableString);
115		while (matcher.find()) {
116			if (matcher.start() < matcher.end()) {
117				spannableString.setSpan(new ForegroundColorSpan(color), matcher.start(), matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
118			}
119		}
120	}
121
122	private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
123		Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
124		final int length = word.length();
125		for (int offset = 0; offset < length; ) {
126			final int codePoint = word.codePointAt(offset);
127			offset += Character.charCount(codePoint);
128			if (!Character.isLetter(codePoint)) {
129				continue;
130			}
131			Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
132			List<String> codePoints;
133			if (map.containsKey(block)) {
134				codePoints = map.get(block);
135			} else {
136				codePoints = new ArrayList<>();
137				map.put(block, codePoints);
138			}
139			codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
140		}
141		return map;
142	}
143
144	@TargetApi(Build.VERSION_CODES.N)
145	private static Map<Character.UnicodeScript, List<String>> map(String word) {
146		Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
147		final int length = word.length();
148		for (int offset = 0; offset < length; ) {
149			final int codePoint = word.codePointAt(offset);
150			Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
151			if (script != Character.UnicodeScript.COMMON) {
152				List<String> codePoints;
153				if (map.containsKey(script)) {
154					codePoints = map.get(script);
155				} else {
156					codePoints = new ArrayList<>();
157					map.put(script, codePoints);
158				}
159				codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
160			}
161			offset += Character.charCount(codePoint);
162		}
163		return map;
164	}
165
166	private static Set<String> eliminateFirstAndGetCodePointsCompat(Map<Character.UnicodeBlock, List<String>> map) {
167		return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
168	}
169
170	@TargetApi(Build.VERSION_CODES.N)
171	private static Set<String> eliminateFirstAndGetCodePoints(Map<Character.UnicodeScript, List<String>> map) {
172		return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
173	}
174
175	private static <T> Set<String> eliminateFirstAndGetCodePoints(Map<T, List<String>> map, T defaultPick) {
176		T pick = defaultPick;
177		int size = 0;
178		for (Map.Entry<T, List<String>> entry : map.entrySet()) {
179			if (entry.getValue().size() > size) {
180				size = entry.getValue().size();
181				pick = entry.getKey();
182			}
183		}
184		map.remove(pick);
185		Set<String> all = new HashSet<>();
186		for (List<String> codePoints : map.values()) {
187			all.addAll(codePoints);
188		}
189		return all;
190	}
191
192	private static Set<String> findIrregularCodePoints(String word) {
193		Set<String> codePoints;
194		if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
195			final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
196			final Set<String> set = asSet(map);
197			if (containsOnlyAmbiguousCyrillic(set)) {
198				return set;
199			}
200			codePoints = eliminateFirstAndGetCodePointsCompat(map);
201		} else {
202			final Map<Character.UnicodeScript, List<String>> map = map(word);
203			final Set<String> set = asSet(map);
204			if (containsOnlyAmbiguousCyrillic(set)) {
205				return set;
206			}
207			codePoints = eliminateFirstAndGetCodePoints(map);
208		}
209		return codePoints;
210	}
211
212	private static Set<String> asSet(Map<?, List<String>> map) {
213		final Set<String> flat = new HashSet<>();
214		for(List<String> value : map.values()) {
215			flat.addAll(value);
216		}
217		return flat;
218	}
219
220
221	private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
222		for (String codePoint : codePoints) {
223			if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
224				return false;
225			}
226		}
227		return true;
228	}
229
230	private static PatternTuple find(Jid jid) {
231		synchronized (CACHE) {
232			PatternTuple pattern = CACHE.get(jid);
233			if (pattern != null) {
234				return pattern;
235			}
236            pattern = PatternTuple.of(jid);
237			CACHE.put(jid, pattern);
238			return pattern;
239		}
240	}
241
242	private static Pattern create(Set<String> codePoints) {
243		final StringBuilder pattern = new StringBuilder();
244		for (String codePoint : codePoints) {
245			if (pattern.length() != 0) {
246				pattern.append('|');
247			}
248			pattern.append(Pattern.quote(codePoint));
249		}
250		return Pattern.compile(pattern.toString());
251	}
252
253	private static class PatternTuple {
254		private final Pattern local;
255		private final List<Pattern> domain;
256
257		private PatternTuple(Pattern local, List<Pattern> domain) {
258			this.local = local;
259			this.domain = domain;
260		}
261
262		private static PatternTuple of(Jid jid) {
263			final Pattern localPattern;
264			if (jid.getEscapedLocal() != null) {
265				localPattern = create(findIrregularCodePoints(jid.getEscapedLocal()));
266			} else {
267				localPattern = null;
268			}
269			String domain = jid.getDomain().toEscapedString();
270			final List<Pattern> domainPatterns = new ArrayList<>();
271			if (domain != null) {
272				for (String label : domain.split("\\.")) {
273					domainPatterns.add(create(findIrregularCodePoints(label)));
274				}
275			}
276			return new PatternTuple(localPattern, domainPatterns);
277		}
278	}
279}