1/*
2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import android.util.LruCache;
33
34import java.util.ArrayList;
35import java.util.Arrays;
36import java.util.HashMap;
37import java.util.HashSet;
38import java.util.List;
39import java.util.regex.Pattern;
40
41public class Emoticons {
42
43 private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
44 private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
45 private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
46 private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
47 private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
48 private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
49 private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
50 private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
51 private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
52 private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
53 private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
54 private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
55 private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
56 private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
57 private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
58
59 private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
60
61 private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
62 GEOMETRIC_SHAPES,
63 LATIN_SUPPLEMENT,
64 CYK_SYMBOLS_AND_PUNCTUATION,
65 LETTERLIKE_SYMBOLS,
66 KEYCAP_COMBINEABLE);
67 private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
68 MISC_SYMBOLS_AND_PICTOGRAPHS,
69 SUPPLEMENTAL_SYMBOLS,
70 EMOTICONS,
71 TRANSPORT_SYMBOLS,
72 MISC_SYMBOLS,
73 DINGBATS,
74 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
75 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
76 MISC_TECHNICAL);
77
78 private static final int MAX_EMOIJS = 42;
79
80 private static final int ZWJ = 0x200D;
81 private static final int VARIATION_16 = 0xFE0F;
82 private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
83 private static final int BLACK_FLAG = 0x1F3F4;
84 private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
85
86 private static final LruCache<CharSequence,Pattern> CACHE = new LruCache<>(256);
87
88 private static List<Symbol> parse(String input) {
89 List<Symbol> symbols = new ArrayList<>();
90 Builder builder = new Builder();
91 boolean needsFinalBuild = false;
92 for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
93 cp = input.codePointAt(i);
94 if (builder.offer(cp)) {
95 needsFinalBuild = true;
96 } else {
97 symbols.add(builder.build());
98 builder = new Builder();
99 if (builder.offer(cp)) {
100 needsFinalBuild = true;
101 }
102 }
103 }
104 if (needsFinalBuild) {
105 symbols.add(builder.build());
106 }
107 return symbols;
108 }
109
110 public static Pattern getEmojiPattern(CharSequence input) {
111 Pattern pattern = CACHE.get(input);
112 if (pattern == null) {
113 pattern = generatePattern(input);
114 CACHE.put(input, pattern);
115 }
116 return pattern;
117 }
118
119 private static Pattern generatePattern(CharSequence input) {
120 final HashSet<String> emojis = new HashSet<>();
121 int i = 0;
122 for(Symbol symbol : parse(input.toString())) {
123 if (symbol instanceof Emoji) {
124 emojis.add(symbol.toString());
125 if (++i >= MAX_EMOIJS) {
126 return Pattern.compile("");
127 }
128 }
129 }
130 final StringBuilder pattern = new StringBuilder();
131 for(String emoji : emojis) {
132 if (pattern.length() != 0) {
133 pattern.append('|');
134 }
135 pattern.append(Pattern.quote(emoji));
136 }
137 return Pattern.compile(pattern.toString());
138 }
139
140 public static boolean isEmoji(String input) {
141 List<Symbol> symbols = parse(input);
142 return symbols.size() == 1 && symbols.get(0).isEmoji();
143 }
144
145 public static boolean isOnlyEmoji(String input) {
146 List<Symbol> symbols = parse(input);
147 for(Symbol symbol : symbols) {
148 if (!symbol.isEmoji()) {
149 return false;
150 }
151 }
152 return symbols.size() > 0;
153 }
154
155 private static abstract class Symbol {
156
157 private final String value;
158
159 public Symbol(List<Integer> codepoints) {
160 StringBuilder builder = new StringBuilder();
161 for(Integer codepoint : codepoints) {
162 builder.appendCodePoint(codepoint);
163 }
164 this.value = builder.toString();
165 }
166
167 abstract boolean isEmoji();
168
169 @Override
170 public String toString() {
171 return value;
172 }
173 }
174
175 public static class Emoji extends Symbol {
176
177 public Emoji(List<Integer> codepoints) {
178 super(codepoints);
179 }
180
181 @Override
182 boolean isEmoji() {
183 return true;
184 }
185 }
186
187 public static class Other extends Symbol {
188
189 public Other(List<Integer> codepoints) {
190 super(codepoints);
191 }
192
193 @Override
194 boolean isEmoji() {
195 return false;
196 }
197 }
198
199 private static class Builder {
200 private final List<Integer> codepoints = new ArrayList<>();
201
202
203 public boolean offer(int codepoint) {
204 boolean add = false;
205 if (this.codepoints.size() == 0) {
206 if (SYMBOLIZE.contains(codepoint)) {
207 add = true;
208 } else if (REGIONAL_INDICATORS.contains(codepoint)) {
209 add = true;
210 } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
211 add = true;
212 }
213 } else {
214 int previous = codepoints.get(codepoints.size() -1);
215 if (codepoints.get(0) == BLACK_FLAG) {
216 add = TAGS.contains(codepoint);
217 } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
218 add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
219 } else if (SYMBOLIZE.contains(previous)) {
220 add = codepoint == VARIATION_16;
221 } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
222 add = codepoints.size() == 1;
223 } else if (previous == VARIATION_16) {
224 add = isMerger(codepoint);
225 } else if (FITZPATRICK.contains(previous)) {
226 add = codepoint == ZWJ;
227 } else if (ZWJ == previous) {
228 add = EMOJIS.contains(codepoint);
229 } else if (isMerger(codepoint)) {
230 add = true;
231 } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
232 add = true;
233 }
234 }
235 if (add) {
236 codepoints.add(codepoint);
237 return true;
238 } else {
239 return false;
240 }
241 }
242
243 private static boolean isMerger(int codepoint) {
244 return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
245 }
246
247 public Symbol build() {
248 if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
249 return new Other(codepoints);
250 } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
251 return new Other(codepoints);
252 }
253 return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints);
254 }
255 }
256
257 public static class UnicodeBlocks implements UnicodeSet {
258 final UnicodeSet[] unicodeSets;
259
260 public UnicodeBlocks(UnicodeSet... sets) {
261 this.unicodeSets = sets;
262 }
263
264 @Override
265 public boolean contains(int codepoint) {
266 for(UnicodeSet unicodeSet : unicodeSets) {
267 if (unicodeSet.contains(codepoint)) {
268 return true;
269 }
270 }
271 return false;
272 }
273 }
274
275 public interface UnicodeSet {
276 boolean contains(int codepoint);
277 }
278
279 public static class UnicodeList implements UnicodeSet {
280
281 private final List<Integer> list;
282
283 public UnicodeList(Integer... codes) {
284 this.list = Arrays.asList(codes);
285 }
286
287 @Override
288 public boolean contains(int codepoint) {
289 return this.list.contains(codepoint);
290 }
291 }
292
293
294 public static class UnicodeRange implements UnicodeSet {
295
296 private final int lower;
297 private final int upper;
298
299 UnicodeRange(int lower, int upper) {
300 this.lower = lower;
301 this.upper = upper;
302 }
303
304 public boolean contains(int codePoint) {
305 return codePoint >= lower && codePoint <= upper;
306 }
307 }
308}