1/*
2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.util.LruCache;
33
34import androidx.annotation.NonNull;
35
36import com.google.common.collect.ImmutableSet;
37
38import java.util.ArrayList;
39import java.util.Arrays;
40import java.util.HashSet;
41import java.util.List;
42import java.util.Set;
43import java.util.regex.Pattern;
44
45public class Emoticons {
46
47 private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS =
48 new UnicodeRange(0x1F300, 0x1F5FF);
49 private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
50 private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1FAF6);
51 // private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
52 private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
53 private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
54 private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
55 new UnicodeRange(0x1F100, 0x1F1FF);
56 private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
57 new UnicodeRange(0x1F200, 0x1F2FF);
58 private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
59 private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
60 private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
61 private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
62 private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
63 private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
64 private static final UnicodeList LETTER_LIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
65
66 private static final UnicodeBlocks KEY_CAP_COMBINABLE =
67 new UnicodeBlocks(
68 new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
69
70 private static final UnicodeBlocks SYMBOLIZE =
71 new UnicodeBlocks(
72 GEOMETRIC_SHAPES,
73 LATIN_SUPPLEMENT,
74 CYK_SYMBOLS_AND_PUNCTUATION,
75 LETTER_LIKE_SYMBOLS,
76 KEY_CAP_COMBINABLE);
77 private static final UnicodeBlocks EMOJIS =
78 new UnicodeBlocks(
79 MISC_SYMBOLS_AND_PICTOGRAPHS,
80 SUPPLEMENTAL_SYMBOLS,
81 EMOTICONS,
82 // TRANSPORT_SYMBOLS,
83 MISC_SYMBOLS,
84 DINGBATS,
85 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
86 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
87 MISC_TECHNICAL);
88
89 private static final int MAX_EMOJIS = 42;
90
91 private static final int ZWJ = 0x200D;
92 private static final int VARIATION_16 = 0xFE0F;
93 private static final int VARIATION_15 = 0xFE0E;
94 private static final String VARIATION_16_STRING = new String(new char[] {VARIATION_16});
95 private static final String VARIATION_15_STRING = new String(new char[] {VARIATION_15});
96 private static final int COMBINING_ENCLOSING_KEY_CAP = 0x20E3;
97 private static final int BLACK_FLAG = 0x1F3F4;
98 private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
99
100 private static final Set<String> TEXT_DEFAULT_TO_VS16 =
101 ImmutableSet.of(
102 "❤",
103 "✔",
104 "✖",
105 "➕",
106 "➖",
107 "➗",
108 "⭐",
109 "⚡",
110 "\uD83C\uDF96",
111 "\uD83C\uDFC6",
112 "\uD83E\uDD47",
113 "\uD83E\uDD48",
114 "\uD83E\uDD49",
115 "\uD83D\uDC51",
116 "⚓",
117 "⛵",
118 "✈",
119 "⚖",
120 "⛑",
121 "⚒",
122 "⛏",
123 "☎",
124 "⛄",
125 "⛅",
126 "⚠",
127 "⚛",
128 "✡",
129 "☮",
130 "☯",
131 "☀",
132 "⬅",
133 "➡",
134 "⬆",
135 "⬇");
136
137 private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
138
139 public static String normalizeToVS16(final String input) {
140 return TEXT_DEFAULT_TO_VS16.contains(input) && !input.endsWith(VARIATION_15_STRING)
141 ? input + VARIATION_16_STRING
142 : input;
143 }
144
145 public static String existingVariant(final String original, final Set<String> existing) {
146 if (existing.contains(original) || original.endsWith(VARIATION_15_STRING)) {
147 return original;
148 }
149 final var variant =
150 original.endsWith(VARIATION_16_STRING)
151 ? original.substring(0, original.length() - 1)
152 : original + VARIATION_16_STRING;
153 return existing.contains(variant) ? variant : original;
154 }
155
156 private static List<Symbol> parse(String input) {
157 List<Symbol> symbols = new ArrayList<>();
158 Builder builder = new Builder();
159 boolean needsFinalBuild = false;
160 for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
161 cp = input.codePointAt(i);
162 if (builder.offer(cp)) {
163 needsFinalBuild = true;
164 } else {
165 symbols.add(builder.build());
166 builder = new Builder();
167 if (builder.offer(cp)) {
168 needsFinalBuild = true;
169 }
170 }
171 }
172 if (needsFinalBuild) {
173 symbols.add(builder.build());
174 }
175 return symbols;
176 }
177
178 public static Pattern getEmojiPattern(final CharSequence input) {
179 Pattern pattern = CACHE.get(input);
180 if (pattern == null) {
181 pattern = generatePattern(input);
182 CACHE.put(input, pattern);
183 }
184 return pattern;
185 }
186
187 private static Pattern generatePattern(CharSequence input) {
188 final HashSet<String> emojis = new HashSet<>();
189 int i = 0;
190 for (final Symbol symbol : parse(input.toString())) {
191 if (symbol instanceof Emoji) {
192 emojis.add(symbol.toString());
193 if (++i >= MAX_EMOJIS) {
194 return Pattern.compile("");
195 }
196 }
197 }
198 final StringBuilder pattern = new StringBuilder();
199 for (String emoji : emojis) {
200 if (pattern.length() != 0) {
201 pattern.append('|');
202 }
203 pattern.append(Pattern.quote(emoji));
204 }
205 return Pattern.compile(pattern.toString());
206 }
207
208 public static boolean isEmoji(String input) {
209 List<Symbol> symbols = parse(input);
210 return symbols.size() == 1 && symbols.get(0).isEmoji();
211 }
212
213 public static boolean isOnlyEmoji(String input) {
214 List<Symbol> symbols = parse(input);
215 for (Symbol symbol : symbols) {
216 if (!symbol.isEmoji()) {
217 return false;
218 }
219 }
220 return !symbols.isEmpty();
221 }
222
223 private abstract static class Symbol {
224
225 private final String value;
226
227 Symbol(List<Integer> codepoints) {
228 final StringBuilder builder = new StringBuilder();
229 for (final Integer codepoint : codepoints) {
230 builder.appendCodePoint(codepoint);
231 }
232 this.value = builder.toString();
233 }
234
235 abstract boolean isEmoji();
236
237 @NonNull
238 @Override
239 public String toString() {
240 return value;
241 }
242 }
243
244 public static class Emoji extends Symbol {
245
246 Emoji(List<Integer> codepoints) {
247 super(codepoints);
248 }
249
250 @Override
251 boolean isEmoji() {
252 return true;
253 }
254 }
255
256 public static class Other extends Symbol {
257
258 public Other(List<Integer> codepoints) {
259 super(codepoints);
260 }
261
262 @Override
263 boolean isEmoji() {
264 return false;
265 }
266 }
267
268 private static class Builder {
269 private final List<Integer> codepoints = new ArrayList<>();
270
271 public boolean offer(int codepoint) {
272 boolean add = false;
273 if (this.codepoints.isEmpty()) {
274 if (SYMBOLIZE.contains(codepoint)) {
275 add = true;
276 } else if (REGIONAL_INDICATORS.contains(codepoint)) {
277 add = true;
278 } else if (EMOJIS.contains(codepoint)
279 && !FITZPATRICK.contains(codepoint)
280 && codepoint != ZWJ) {
281 add = true;
282 }
283 } else {
284 int previous = codepoints.get(codepoints.size() - 1);
285 if (codepoints.get(0) == BLACK_FLAG) {
286 add = TAGS.contains(codepoint);
287 } else if (COMBINING_ENCLOSING_KEY_CAP == codepoint) {
288 add = KEY_CAP_COMBINABLE.contains(previous) || previous == VARIATION_16;
289 } else if (SYMBOLIZE.contains(previous)) {
290 add = codepoint == VARIATION_16;
291 } else if (REGIONAL_INDICATORS.contains(previous)
292 && REGIONAL_INDICATORS.contains(codepoint)) {
293 add = codepoints.size() == 1;
294 } else if (previous == VARIATION_16) {
295 add = isMerger(codepoint) || codepoint == VARIATION_16;
296 } else if (FITZPATRICK.contains(previous)) {
297 add = codepoint == ZWJ;
298 } else if (ZWJ == previous) {
299 add = EMOJIS.contains(codepoint);
300 } else if (isMerger(codepoint)) {
301 add = true;
302 } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
303 add = true;
304 }
305 }
306 if (add) {
307 codepoints.add(codepoint);
308 return true;
309 } else {
310 return false;
311 }
312 }
313
314 private static boolean isMerger(int codepoint) {
315 return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
316 }
317
318 public Symbol build() {
319 if (!codepoints.isEmpty()
320 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
321 return new Other(codepoints);
322 } else if (codepoints.size() > 1
323 && KEY_CAP_COMBINABLE.contains(codepoints.get(0))
324 && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEY_CAP) {
325 return new Other(codepoints);
326 }
327 return codepoints.isEmpty() ? new Other(codepoints) : new Emoji(codepoints);
328 }
329 }
330
331 public static class UnicodeBlocks implements UnicodeSet {
332 final UnicodeSet[] unicodeSets;
333
334 UnicodeBlocks(final UnicodeSet... sets) {
335 this.unicodeSets = sets;
336 }
337
338 @Override
339 public boolean contains(int codepoint) {
340 for (UnicodeSet unicodeSet : unicodeSets) {
341 if (unicodeSet.contains(codepoint)) {
342 return true;
343 }
344 }
345 return false;
346 }
347 }
348
349 public interface UnicodeSet {
350 boolean contains(int codepoint);
351 }
352
353 public static class UnicodeList implements UnicodeSet {
354
355 private final List<Integer> list;
356
357 UnicodeList(final Integer... codes) {
358 this.list = Arrays.asList(codes);
359 }
360
361 @Override
362 public boolean contains(int codepoint) {
363 return this.list.contains(codepoint);
364 }
365 }
366
367 public static class UnicodeRange implements UnicodeSet {
368
369 private final int lower;
370 private final int upper;
371
372 UnicodeRange(int lower, int upper) {
373 this.lower = lower;
374 this.upper = upper;
375 }
376
377 public boolean contains(int codePoint) {
378 return codePoint >= lower && codePoint <= upper;
379 }
380 }
381}