Emoticons: detect keycaps

Daniel Gultsch created

Change summary

src/main/java/eu/siacs/conversations/utils/Emoticons.java | 64 +++++++-
1 file changed, 53 insertions(+), 11 deletions(-)

Detailed changes

src/main/java/eu/siacs/conversations/utils/Emoticons.java 🔗

@@ -1,8 +1,13 @@
 package eu.siacs.conversations.utils;
 
+import android.util.Log;
+
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
+import eu.siacs.conversations.Config;
+
 public class Emoticons {
 
 	private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
@@ -12,11 +17,22 @@ public class Emoticons {
 	private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
 	private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
 	private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
+	private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
 	private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
 	private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
 	private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
 	private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
-	private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(GEOMETRIC_SHAPES, LATIN_SUPPLEMENT);
+	private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
+	private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
+
+	private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
+
+	private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
+			GEOMETRIC_SHAPES,
+			LATIN_SUPPLEMENT,
+			CYK_SYMBOLS_AND_PUNCTUATION,
+			LETTERLIKE_SYMBOLS,
+			KEYCAP_COMBINEABLE);
 	private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
 			MISC_SYMBOLS_AND_PICTOGRAPHS,
 			SUPPLEMENTAL_SYMBOLS,
@@ -25,9 +41,11 @@ public class Emoticons {
 			MISC_SYMBOLS,
 			DINGBATS,
 			ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
+			ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 			MISC_TECHNICAL);
 	private static final int ZWJ = 0x200D;
 	private static final int VARIATION_16 = 0xFE0F;
+	private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
 	private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
 
 	private static List<Symbol> parse(String input) {
@@ -88,22 +106,25 @@ public class Emoticons {
 				}
 			} else {
 				int previous = codepoints.get(codepoints.size() -1);
-				if (SYMBOLIZE.contains(previous)) {
+				if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
+					add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
+				} else if (SYMBOLIZE.contains(previous)) {
 					add = codepoint == VARIATION_16;
 				} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
 					add = codepoints.size() == 1;
 				} else if (previous == VARIATION_16) {
 					add = isMerger(codepoint);
 				} else if (FITZPATRICK.contains(previous)) {
-					add = codepoint == ZWJ || EMOJIS.contains(codepoint);
+					add = codepoint == ZWJ;
 				} else if (ZWJ == previous) {
-					add = EMOJIS.contains(codepoint) || FITZPATRICK.contains(codepoint);
+					add = EMOJIS.contains(codepoint);
 				} else if (isMerger(codepoint)) {
 					add = true;
 				} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
 					add = true;
 				}
 			}
+			Log.d(Config.LOGTAG,"code point "+String.format("%H",codepoint)+" added="+add);
 			if (add) {
 				codepoints.add(codepoint);
 				return true;
@@ -119,21 +140,24 @@ public class Emoticons {
 		public Symbol build() {
 			if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
 				return Symbol.NON_EMOJI;
+			} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
+				return Symbol.NON_EMOJI;
 			}
 			return codepoints.size() == 0 ? Symbol.NON_EMOJI : Symbol.EMOJI;
 		}
 	}
 
-	public static class UnicodeBlocks {
-		final UnicodeRange[] ranges;
+	public static class UnicodeBlocks implements UnicodeSet {
+		final UnicodeSet[] unicodeSets;
 
-		public UnicodeBlocks(UnicodeRange... ranges) {
-			this.ranges = ranges;
+		public UnicodeBlocks(UnicodeSet... sets) {
+			this.unicodeSets = sets;
 		}
 
+		@Override
 		public boolean contains(int codepoint) {
-			for(UnicodeRange range : ranges) {
-				if (range.contains(codepoint)) {
+			for(UnicodeSet unicodeSet : unicodeSets) {
+				if (unicodeSet.contains(codepoint)) {
 					return true;
 				}
 			}
@@ -141,8 +165,26 @@ public class Emoticons {
 		}
 	}
 
+	public interface UnicodeSet {
+		boolean contains(int codepoint);
+	}
+
+	public static class UnicodeList implements UnicodeSet {
+
+		private final List<Integer> list;
+
+		public UnicodeList(Integer... codes) {
+			this.list = Arrays.asList(codes);
+		}
+
+		@Override
+		public boolean contains(int codepoint) {
+			return this.list.contains(codepoint);
+		}
+	}
+
 
-	public static class UnicodeRange {
+	public static class UnicodeRange implements UnicodeSet {
 
 		private final int lower;
 		private final int upper;