Emoticons.java

  1package eu.siacs.conversations.utils;
  2
  3import java.util.ArrayList;
  4import java.util.Arrays;
  5import java.util.List;
  6
  7public class Emoticons {
  8
  9	private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
 10	private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
 11	private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
 12	private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
 13	private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
 14	private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
 15	private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
 16	private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
 17	private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
 18	private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
 19	private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
 20	private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
 21	private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
 22	private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
 23	private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
 24
 25	private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
 26
 27	private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
 28			GEOMETRIC_SHAPES,
 29			LATIN_SUPPLEMENT,
 30			CYK_SYMBOLS_AND_PUNCTUATION,
 31			LETTERLIKE_SYMBOLS,
 32			KEYCAP_COMBINEABLE);
 33	private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
 34			MISC_SYMBOLS_AND_PICTOGRAPHS,
 35			SUPPLEMENTAL_SYMBOLS,
 36			EMOTICONS,
 37			TRANSPORT_SYMBOLS,
 38			MISC_SYMBOLS,
 39			DINGBATS,
 40			ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
 41			ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 42			MISC_TECHNICAL);
 43
 44	private static final int ZWJ = 0x200D;
 45	private static final int VARIATION_16 = 0xFE0F;
 46	private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
 47	private static final int BLACK_FLAG = 0x1F3F4;
 48	private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
 49
 50	private static List<Symbol> parse(String input) {
 51		List<Symbol> symbols = new ArrayList<>();
 52		Builder builder = new Builder();
 53		boolean needsFinalBuild = false;
 54		for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
 55			cp = input.codePointAt(i);
 56			if (builder.offer(cp)) {
 57				needsFinalBuild = true;
 58			} else {
 59				symbols.add(builder.build());
 60				builder = new Builder();
 61				if (builder.offer(cp)) {
 62					needsFinalBuild = true;
 63				}
 64			}
 65		}
 66		if (needsFinalBuild) {
 67			symbols.add(builder.build());
 68		}
 69		return symbols;
 70	}
 71
 72	public static boolean isEmoji(String input) {
 73		List<Symbol> symbols = parse(input);
 74		return symbols.size() == 1 && symbols.get(0) == Symbol.EMOJI;
 75	}
 76
 77	public static boolean isOnlyEmoji(String input) {
 78		List<Symbol> symbols = parse(input);
 79		for(Symbol symbol : symbols) {
 80			if (symbol == Symbol.NON_EMOJI) {
 81				return false;
 82			}
 83		}
 84		return symbols.size() > 0;
 85	}
 86
 87	private enum Symbol {
 88		EMOJI, NON_EMOJI
 89	}
 90
 91
 92	private static class Builder {
 93		private final List<Integer> codepoints = new ArrayList<>();
 94
 95
 96		public boolean offer(int codepoint) {
 97			boolean add = false;
 98			if (this.codepoints.size() == 0) {
 99				if (SYMBOLIZE.contains(codepoint)) {
100					add = true;
101				} else if (REGIONAL_INDICATORS.contains(codepoint)) {
102					add = true;
103				} else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
104					add = true;
105				}
106			} else {
107				int previous = codepoints.get(codepoints.size() -1);
108				if (codepoints.get(0) == BLACK_FLAG) {
109					add = TAGS.contains(codepoint);
110				} else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
111					add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
112				} else if (SYMBOLIZE.contains(previous)) {
113					add = codepoint == VARIATION_16;
114				} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
115					add = codepoints.size() == 1;
116				} else if (previous == VARIATION_16) {
117					add = isMerger(codepoint);
118				} else if (FITZPATRICK.contains(previous)) {
119					add = codepoint == ZWJ;
120				} else if (ZWJ == previous) {
121					add = EMOJIS.contains(codepoint);
122				} else if (isMerger(codepoint)) {
123					add = true;
124				} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
125					add = true;
126				}
127			}
128			if (add) {
129				codepoints.add(codepoint);
130				return true;
131			} else {
132				return false;
133			}
134		}
135
136		private static boolean isMerger(int codepoint) {
137			return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
138		}
139
140		public Symbol build() {
141			if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
142				return Symbol.NON_EMOJI;
143			} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
144				return Symbol.NON_EMOJI;
145			}
146			return codepoints.size() == 0 ? Symbol.NON_EMOJI : Symbol.EMOJI;
147		}
148	}
149
150	public static class UnicodeBlocks implements UnicodeSet {
151		final UnicodeSet[] unicodeSets;
152
153		public UnicodeBlocks(UnicodeSet... sets) {
154			this.unicodeSets = sets;
155		}
156
157		@Override
158		public boolean contains(int codepoint) {
159			for(UnicodeSet unicodeSet : unicodeSets) {
160				if (unicodeSet.contains(codepoint)) {
161					return true;
162				}
163			}
164			return false;
165		}
166	}
167
168	public interface UnicodeSet {
169		boolean contains(int codepoint);
170	}
171
172	public static class UnicodeList implements UnicodeSet {
173
174		private final List<Integer> list;
175
176		public UnicodeList(Integer... codes) {
177			this.list = Arrays.asList(codes);
178		}
179
180		@Override
181		public boolean contains(int codepoint) {
182			return this.list.contains(codepoint);
183		}
184	}
185
186
187	public static class UnicodeRange implements UnicodeSet {
188
189		private final int lower;
190		private final int upper;
191
192		UnicodeRange(int lower, int upper) {
193			this.lower = lower;
194			this.upper = upper;
195		}
196
197		public boolean contains(int codePoint) {
198			return codePoint >= lower && codePoint <= upper;
199		}
200	}
201}