1/*
2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.util.LruCache;
33
34import androidx.annotation.NonNull;
35
36import java.util.ArrayList;
37import java.util.Arrays;
38import java.util.HashSet;
39import java.util.List;
40import java.util.regex.Pattern;
41
42public class Emoticons {
43
44 private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
45 private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
46 private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1FAF6);
47 //private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
48 private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
49 private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
50 private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
51 private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
52 private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
53 private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
54 private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
55 private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
56 private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
57 private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
58 private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
59
60 private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
61
62 private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
63 GEOMETRIC_SHAPES,
64 LATIN_SUPPLEMENT,
65 CYK_SYMBOLS_AND_PUNCTUATION,
66 LETTERLIKE_SYMBOLS,
67 KEYCAP_COMBINEABLE);
68 private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
69 MISC_SYMBOLS_AND_PICTOGRAPHS,
70 SUPPLEMENTAL_SYMBOLS,
71 EMOTICONS,
72 //TRANSPORT_SYMBOLS,
73 MISC_SYMBOLS,
74 DINGBATS,
75 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
76 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
77 MISC_TECHNICAL);
78
79 private static final int MAX_EMOIJS = 42;
80
81 private static final int ZWJ = 0x200D;
82 private static final int VARIATION_16 = 0xFE0F;
83 private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
84 private static final int BLACK_FLAG = 0x1F3F4;
85 private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
86
87 private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
88
89 private static List<Symbol> parse(String input) {
90 List<Symbol> symbols = new ArrayList<>();
91 Builder builder = new Builder();
92 boolean needsFinalBuild = false;
93 for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
94 cp = input.codePointAt(i);
95 if (builder.offer(cp)) {
96 needsFinalBuild = true;
97 } else {
98 symbols.add(builder.build());
99 builder = new Builder();
100 if (builder.offer(cp)) {
101 needsFinalBuild = true;
102 }
103 }
104 }
105 if (needsFinalBuild) {
106 symbols.add(builder.build());
107 }
108 return symbols;
109 }
110
111 public static Pattern getEmojiPattern(final CharSequence input) {
112 Pattern pattern = CACHE.get(input);
113 if (pattern == null) {
114 pattern = generatePattern(input);
115 CACHE.put(input, pattern);
116 }
117 return pattern;
118 }
119
120 private static Pattern generatePattern(CharSequence input) {
121 final HashSet<String> emojis = new HashSet<>();
122 int i = 0;
123 for (final Symbol symbol : parse(input.toString())) {
124 if (symbol instanceof Emoji) {
125 emojis.add(symbol.toString());
126 if (++i >= MAX_EMOIJS) {
127 return Pattern.compile("");
128 }
129 }
130 }
131 final StringBuilder pattern = new StringBuilder();
132 for (String emoji : emojis) {
133 if (pattern.length() != 0) {
134 pattern.append('|');
135 }
136 pattern.append(Pattern.quote(emoji));
137 }
138 return Pattern.compile(pattern.toString());
139 }
140
141 public static boolean isEmoji(String input) {
142 List<Symbol> symbols = parse(input);
143 return symbols.size() == 1 && symbols.get(0).isEmoji();
144 }
145
146 public static boolean isOnlyEmoji(String input) {
147 List<Symbol> symbols = parse(input);
148 for (Symbol symbol : symbols) {
149 if (!symbol.isEmoji()) {
150 return false;
151 }
152 }
153 return symbols.size() > 0;
154 }
155
156 private static abstract class Symbol {
157
158 private final String value;
159
160 Symbol(List<Integer> codepoints) {
161 final StringBuilder builder = new StringBuilder();
162 for (final Integer codepoint : codepoints) {
163 builder.appendCodePoint(codepoint);
164 }
165 this.value = builder.toString();
166 }
167
168 abstract boolean isEmoji();
169
170 @NonNull
171 @Override
172 public String toString() {
173 return value;
174 }
175 }
176
177 public static class Emoji extends Symbol {
178
179 Emoji(List<Integer> codepoints) {
180 super(codepoints);
181 }
182
183 @Override
184 boolean isEmoji() {
185 return true;
186 }
187 }
188
189 public static class Other extends Symbol {
190
191 public Other(List<Integer> codepoints) {
192 super(codepoints);
193 }
194
195 @Override
196 boolean isEmoji() {
197 return false;
198 }
199 }
200
201 private static class Builder {
202 private final List<Integer> codepoints = new ArrayList<>();
203
204
205 public boolean offer(int codepoint) {
206 boolean add = false;
207 if (this.codepoints.size() == 0) {
208 if (SYMBOLIZE.contains(codepoint)) {
209 add = true;
210 } else if (REGIONAL_INDICATORS.contains(codepoint)) {
211 add = true;
212 } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
213 add = true;
214 }
215 } else {
216 int previous = codepoints.get(codepoints.size() - 1);
217 if (codepoints.get(0) == BLACK_FLAG) {
218 add = TAGS.contains(codepoint);
219 } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
220 add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
221 } else if (SYMBOLIZE.contains(previous)) {
222 add = codepoint == VARIATION_16;
223 } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
224 add = codepoints.size() == 1;
225 } else if (previous == VARIATION_16) {
226 add = isMerger(codepoint) || codepoint == VARIATION_16;
227 } else if (FITZPATRICK.contains(previous)) {
228 add = codepoint == ZWJ;
229 } else if (ZWJ == previous) {
230 add = EMOJIS.contains(codepoint);
231 } else if (isMerger(codepoint)) {
232 add = true;
233 } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
234 add = true;
235 }
236 }
237 if (add) {
238 codepoints.add(codepoint);
239 return true;
240 } else {
241 return false;
242 }
243 }
244
245 private static boolean isMerger(int codepoint) {
246 return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
247 }
248
249 public Symbol build() {
250 if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
251 return new Other(codepoints);
252 } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
253 return new Other(codepoints);
254 }
255 return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
256 }
257 }
258
259 public static class UnicodeBlocks implements UnicodeSet {
260 final UnicodeSet[] unicodeSets;
261
262 UnicodeBlocks(final UnicodeSet... sets) {
263 this.unicodeSets = sets;
264 }
265
266 @Override
267 public boolean contains(int codepoint) {
268 for (UnicodeSet unicodeSet : unicodeSets) {
269 if (unicodeSet.contains(codepoint)) {
270 return true;
271 }
272 }
273 return false;
274 }
275 }
276
277 public interface UnicodeSet {
278 boolean contains(int codepoint);
279 }
280
281 public static class UnicodeList implements UnicodeSet {
282
283 private final List<Integer> list;
284
285 UnicodeList(final Integer... codes) {
286 this.list = Arrays.asList(codes);
287 }
288
289 @Override
290 public boolean contains(int codepoint) {
291 return this.list.contains(codepoint);
292 }
293 }
294
295
296 public static class UnicodeRange implements UnicodeSet {
297
298 private final int lower;
299 private final int upper;
300
301 UnicodeRange(int lower, int upper) {
302 this.lower = lower;
303 this.upper = upper;
304 }
305
306 public boolean contains(int codePoint) {
307 return codePoint >= lower && codePoint <= upper;
308 }
309 }
310}