warn when using _only_ ambiguous cyrillic

Daniel Gultsch created

Change summary

src/main/java/eu/siacs/conversations/utils/IrregularUnicodeDetector.java | 37 
1 file changed, 34 insertions(+), 3 deletions(-)

Detailed changes

src/main/java/eu/siacs/conversations/utils/IrregularUnicodeDetector.java ๐Ÿ”—

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Daniel Gultsch All rights reserved.
+ * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification,
  * are permitted provided that the following conditions are met:
@@ -40,6 +40,8 @@ import android.text.style.ForegroundColorSpan;
 import android.util.LruCache;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -57,6 +59,7 @@ public class IrregularUnicodeDetector {
 
 	private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
 	private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
+	private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("ะฐ","ะณ","ะต","ั•","ั–","q","ะพ","ั€","ั","ัƒ");
 
 	static {
 		Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
@@ -185,13 +188,41 @@ public class IrregularUnicodeDetector {
 	private static Set<String> findIrregularCodePoints(String word) {
 		Set<String> codePoints;
 		if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
-			codePoints = eliminateFirstAndGetCodePointsCompat(mapCompat(word));
+			final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
+			final Set<String> set = asSet(map);
+			if (containsOnlyAmbiguousCyrillic(set)) {
+				return set;
+			}
+			codePoints = eliminateFirstAndGetCodePointsCompat(map);
 		} else {
-			codePoints = eliminateFirstAndGetCodePoints(map(word));
+			final Map<Character.UnicodeScript, List<String>> map = map(word);
+			final Set<String> set = asSet(map);
+			if (containsOnlyAmbiguousCyrillic(set)) {
+				return set;
+			}
+			codePoints = eliminateFirstAndGetCodePoints(map);
 		}
 		return codePoints;
 	}
 
+	private static Set<String> asSet(Map<?, List<String>> map) {
+		final Set<String> flat = new HashSet<>();
+		for(List<String> value : map.values()) {
+			flat.addAll(value);
+		}
+		return flat;
+	}
+
+
+	private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
+		for (String codePoint : codePoints) {
+			if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
+				return false;
+			}
+		}
+		return true;
+	}
+
 	private static PatternTuple find(Jid jid) {
 		synchronized (CACHE) {
 			PatternTuple pattern = CACHE.get(jid);