Emoticons.java

  1/*
  2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 *
  7 * 1. Redistributions of source code must retain the above copyright notice, this
  8 * list of conditions and the following disclaimer.
  9 *
 10 * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 * this list of conditions and the following disclaimer in the documentation and/or
 12 * other materials provided with the distribution.
 13 *
 14 * 3. Neither the name of the copyright holder nor the names of its contributors
 15 * may be used to endorse or promote products derived from this software without
 16 * specific prior written permission.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 */
 29
 30package eu.siacs.conversations.utils;
 31
 32import android.util.LruCache;
 33
 34import java.util.ArrayList;
 35import java.util.Arrays;
 36import java.util.HashMap;
 37import java.util.HashSet;
 38import java.util.List;
 39import java.util.regex.Pattern;
 40
 41public class Emoticons {
 42
 43	private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
 44	private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
 45	private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
 46	private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
 47	private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
 48	private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
 49	private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
 50	private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
 51	private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
 52	private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
 53	private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
 54	private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
 55	private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
 56	private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
 57	private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
 58
 59	private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
 60
 61	private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
 62			GEOMETRIC_SHAPES,
 63			LATIN_SUPPLEMENT,
 64			CYK_SYMBOLS_AND_PUNCTUATION,
 65			LETTERLIKE_SYMBOLS,
 66			KEYCAP_COMBINEABLE);
 67	private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
 68			MISC_SYMBOLS_AND_PICTOGRAPHS,
 69			SUPPLEMENTAL_SYMBOLS,
 70			EMOTICONS,
 71			TRANSPORT_SYMBOLS,
 72			MISC_SYMBOLS,
 73			DINGBATS,
 74			ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
 75			ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 76			MISC_TECHNICAL);
 77
 78	private static final int MAX_EMOIJS = 42;
 79
 80	private static final int ZWJ = 0x200D;
 81	private static final int VARIATION_16 = 0xFE0F;
 82	private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
 83	private static final int BLACK_FLAG = 0x1F3F4;
 84	private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
 85
 86	private static final LruCache<CharSequence,Pattern> CACHE = new LruCache<>(256);
 87
 88	private static List<Symbol> parse(String input) {
 89		List<Symbol> symbols = new ArrayList<>();
 90		Builder builder = new Builder();
 91		boolean needsFinalBuild = false;
 92		for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
 93			cp = input.codePointAt(i);
 94			if (builder.offer(cp)) {
 95				needsFinalBuild = true;
 96			} else {
 97				symbols.add(builder.build());
 98				builder = new Builder();
 99				if (builder.offer(cp)) {
100					needsFinalBuild = true;
101				}
102			}
103		}
104		if (needsFinalBuild) {
105			symbols.add(builder.build());
106		}
107		return symbols;
108	}
109
110	public static Pattern getEmojiPattern(CharSequence input) {
111		Pattern pattern = CACHE.get(input);
112		if (pattern == null) {
113			pattern = generatePattern(input);
114			CACHE.put(input, pattern);
115		}
116		return pattern;
117	}
118
119	private static Pattern generatePattern(CharSequence input) {
120		final HashSet<String> emojis = new HashSet<>();
121		int i = 0;
122		for(Symbol symbol : parse(input.toString())) {
123			if (symbol instanceof Emoji) {
124				emojis.add(symbol.toString());
125				if (++i >= MAX_EMOIJS) {
126					return Pattern.compile("");
127				}
128			}
129		}
130		final StringBuilder pattern = new StringBuilder();
131		for(String emoji : emojis) {
132			if (pattern.length() != 0) {
133				pattern.append('|');
134			}
135			pattern.append(Pattern.quote(emoji));
136		}
137		return Pattern.compile(pattern.toString());
138	}
139
140	public static boolean isEmoji(String input) {
141		List<Symbol> symbols = parse(input);
142		return symbols.size() == 1 && symbols.get(0).isEmoji();
143	}
144
145	public static boolean isOnlyEmoji(String input) {
146		List<Symbol> symbols = parse(input);
147		for(Symbol symbol : symbols) {
148			if (!symbol.isEmoji()) {
149				return false;
150			}
151		}
152		return symbols.size() > 0;
153	}
154
155	private static abstract class Symbol {
156
157		private final String value;
158
159		public Symbol(List<Integer> codepoints) {
160			StringBuilder builder = new StringBuilder();
161			for(Integer codepoint : codepoints) {
162				builder.appendCodePoint(codepoint);
163			}
164			this.value = builder.toString();
165		}
166
167		abstract boolean isEmoji();
168
169		@Override
170		public String toString() {
171			return value;
172		}
173	}
174
175	public static class Emoji extends Symbol {
176
177		public Emoji(List<Integer> codepoints) {
178			super(codepoints);
179		}
180
181		@Override
182		boolean isEmoji() {
183			return true;
184		}
185	}
186
187	public static class Other extends Symbol {
188
189		public Other(List<Integer> codepoints) {
190			super(codepoints);
191		}
192
193		@Override
194		boolean isEmoji() {
195			return false;
196		}
197	}
198
199	private static class Builder {
200		private final List<Integer> codepoints = new ArrayList<>();
201
202
203		public boolean offer(int codepoint) {
204			boolean add = false;
205			if (this.codepoints.size() == 0) {
206				if (SYMBOLIZE.contains(codepoint)) {
207					add = true;
208				} else if (REGIONAL_INDICATORS.contains(codepoint)) {
209					add = true;
210				} else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
211					add = true;
212				}
213			} else {
214				int previous = codepoints.get(codepoints.size() -1);
215				if (codepoints.get(0) == BLACK_FLAG) {
216					add = TAGS.contains(codepoint);
217				} else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
218					add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
219				} else if (SYMBOLIZE.contains(previous)) {
220					add = codepoint == VARIATION_16;
221				} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
222					add = codepoints.size() == 1;
223				} else if (previous == VARIATION_16) {
224					add = isMerger(codepoint);
225				} else if (FITZPATRICK.contains(previous)) {
226					add = codepoint == ZWJ;
227				} else if (ZWJ == previous) {
228					add = EMOJIS.contains(codepoint);
229				} else if (isMerger(codepoint)) {
230					add = true;
231				} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
232					add = true;
233				}
234			}
235			if (add) {
236				codepoints.add(codepoint);
237				return true;
238			} else {
239				return false;
240			}
241		}
242
243		private static boolean isMerger(int codepoint) {
244			return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
245		}
246
247		public Symbol build() {
248			if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
249				return new Other(codepoints);
250			} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
251				return new Other(codepoints);
252			}
253			return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints);
254		}
255	}
256
257	public static class UnicodeBlocks implements UnicodeSet {
258		final UnicodeSet[] unicodeSets;
259
260		public UnicodeBlocks(UnicodeSet... sets) {
261			this.unicodeSets = sets;
262		}
263
264		@Override
265		public boolean contains(int codepoint) {
266			for(UnicodeSet unicodeSet : unicodeSets) {
267				if (unicodeSet.contains(codepoint)) {
268					return true;
269				}
270			}
271			return false;
272		}
273	}
274
275	public interface UnicodeSet {
276		boolean contains(int codepoint);
277	}
278
279	public static class UnicodeList implements UnicodeSet {
280
281		private final List<Integer> list;
282
283		public UnicodeList(Integer... codes) {
284			this.list = Arrays.asList(codes);
285		}
286
287		@Override
288		public boolean contains(int codepoint) {
289			return this.list.contains(codepoint);
290		}
291	}
292
293
294	public static class UnicodeRange implements UnicodeSet {
295
296		private final int lower;
297		private final int upper;
298
299		UnicodeRange(int lower, int upper) {
300			this.lower = lower;
301			this.upper = upper;
302		}
303
304		public boolean contains(int codePoint) {
305			return codePoint >= lower && codePoint <= upper;
306		}
307	}
308}