1/*
2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.support.annotation.NonNull;
33import android.util.LruCache;
34
35import java.util.ArrayList;
36import java.util.Arrays;
37import java.util.HashSet;
38import java.util.List;
39import java.util.regex.Pattern;
40
41public class Emoticons {
42
43 private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
44 private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
45 private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1F64F);
46 private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
47 private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
48 private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
49 private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
50 private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
51 private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
52 private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
53 private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
54 private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
55 private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
56 private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
57 private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
58
59 private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
60
61 private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
62 GEOMETRIC_SHAPES,
63 LATIN_SUPPLEMENT,
64 CYK_SYMBOLS_AND_PUNCTUATION,
65 LETTERLIKE_SYMBOLS,
66 KEYCAP_COMBINEABLE);
67 private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
68 MISC_SYMBOLS_AND_PICTOGRAPHS,
69 SUPPLEMENTAL_SYMBOLS,
70 EMOTICONS,
71 TRANSPORT_SYMBOLS,
72 MISC_SYMBOLS,
73 DINGBATS,
74 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
75 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
76 MISC_TECHNICAL);
77
78 private static final int MAX_EMOIJS = 42;
79
80 private static final int ZWJ = 0x200D;
81 private static final int VARIATION_16 = 0xFE0F;
82 private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
83 private static final int BLACK_FLAG = 0x1F3F4;
84 private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
85
86 private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
87
88 private static List<Symbol> parse(String input) {
89 List<Symbol> symbols = new ArrayList<>();
90 Builder builder = new Builder();
91 boolean needsFinalBuild = false;
92 for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
93 cp = input.codePointAt(i);
94 if (builder.offer(cp)) {
95 needsFinalBuild = true;
96 } else {
97 symbols.add(builder.build());
98 builder = new Builder();
99 if (builder.offer(cp)) {
100 needsFinalBuild = true;
101 }
102 }
103 }
104 if (needsFinalBuild) {
105 symbols.add(builder.build());
106 }
107 return symbols;
108 }
109
110 public static Pattern getEmojiPattern(final CharSequence input) {
111 Pattern pattern = CACHE.get(input);
112 if (pattern == null) {
113 pattern = generatePattern(input);
114 CACHE.put(input, pattern);
115 }
116 return pattern;
117 }
118
119 private static Pattern generatePattern(CharSequence input) {
120 final HashSet<String> emojis = new HashSet<>();
121 int i = 0;
122 for (final Symbol symbol : parse(input.toString())) {
123 if (symbol instanceof Emoji) {
124 emojis.add(symbol.toString());
125 if (++i >= MAX_EMOIJS) {
126 return Pattern.compile("");
127 }
128 }
129 }
130 final StringBuilder pattern = new StringBuilder();
131 for (String emoji : emojis) {
132 if (pattern.length() != 0) {
133 pattern.append('|');
134 }
135 pattern.append(Pattern.quote(emoji));
136 }
137 return Pattern.compile(pattern.toString());
138 }
139
140 public static boolean isEmoji(String input) {
141 List<Symbol> symbols = parse(input);
142 return symbols.size() == 1 && symbols.get(0).isEmoji();
143 }
144
145 public static boolean isOnlyEmoji(String input) {
146 List<Symbol> symbols = parse(input);
147 for (Symbol symbol : symbols) {
148 if (!symbol.isEmoji()) {
149 return false;
150 }
151 }
152 return symbols.size() > 0;
153 }
154
155 private static abstract class Symbol {
156
157 private final String value;
158
159 Symbol(List<Integer> codepoints) {
160 final StringBuilder builder = new StringBuilder();
161 for (final Integer codepoint : codepoints) {
162 builder.appendCodePoint(codepoint);
163 }
164 this.value = builder.toString();
165 }
166
167 abstract boolean isEmoji();
168
169 @NonNull
170 @Override
171 public String toString() {
172 return value;
173 }
174 }
175
176 public static class Emoji extends Symbol {
177
178 Emoji(List<Integer> codepoints) {
179 super(codepoints);
180 }
181
182 @Override
183 boolean isEmoji() {
184 return true;
185 }
186 }
187
188 public static class Other extends Symbol {
189
190 public Other(List<Integer> codepoints) {
191 super(codepoints);
192 }
193
194 @Override
195 boolean isEmoji() {
196 return false;
197 }
198 }
199
200 private static class Builder {
201 private final List<Integer> codepoints = new ArrayList<>();
202
203
204 public boolean offer(int codepoint) {
205 boolean add = false;
206 if (this.codepoints.size() == 0) {
207 if (SYMBOLIZE.contains(codepoint)) {
208 add = true;
209 } else if (REGIONAL_INDICATORS.contains(codepoint)) {
210 add = true;
211 } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
212 add = true;
213 }
214 } else {
215 int previous = codepoints.get(codepoints.size() - 1);
216 if (codepoints.get(0) == BLACK_FLAG) {
217 add = TAGS.contains(codepoint);
218 } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
219 add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
220 } else if (SYMBOLIZE.contains(previous)) {
221 add = codepoint == VARIATION_16;
222 } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
223 add = codepoints.size() == 1;
224 } else if (previous == VARIATION_16) {
225 add = isMerger(codepoint) || codepoint == VARIATION_16;
226 } else if (FITZPATRICK.contains(previous)) {
227 add = codepoint == ZWJ;
228 } else if (ZWJ == previous) {
229 add = EMOJIS.contains(codepoint);
230 } else if (isMerger(codepoint)) {
231 add = true;
232 } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
233 add = true;
234 }
235 }
236 if (add) {
237 codepoints.add(codepoint);
238 return true;
239 } else {
240 return false;
241 }
242 }
243
244 private static boolean isMerger(int codepoint) {
245 return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
246 }
247
248 public Symbol build() {
249 if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
250 return new Other(codepoints);
251 } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
252 return new Other(codepoints);
253 }
254 return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
255 }
256 }
257
258 public static class UnicodeBlocks implements UnicodeSet {
259 final UnicodeSet[] unicodeSets;
260
261 UnicodeBlocks(final UnicodeSet... sets) {
262 this.unicodeSets = sets;
263 }
264
265 @Override
266 public boolean contains(int codepoint) {
267 for (UnicodeSet unicodeSet : unicodeSets) {
268 if (unicodeSet.contains(codepoint)) {
269 return true;
270 }
271 }
272 return false;
273 }
274 }
275
276 public interface UnicodeSet {
277 boolean contains(int codepoint);
278 }
279
280 public static class UnicodeList implements UnicodeSet {
281
282 private final List<Integer> list;
283
284 UnicodeList(final Integer... codes) {
285 this.list = Arrays.asList(codes);
286 }
287
288 @Override
289 public boolean contains(int codepoint) {
290 return this.list.contains(codepoint);
291 }
292 }
293
294
295 public static class UnicodeRange implements UnicodeSet {
296
297 private final int lower;
298 private final int upper;
299
300 UnicodeRange(int lower, int upper) {
301 this.lower = lower;
302 this.upper = upper;
303 }
304
305 public boolean contains(int codePoint) {
306 return codePoint >= lower && codePoint <= upper;
307 }
308 }
309}