1/*
2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation and/or
12 * other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors
15 * may be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30package eu.siacs.conversations.utils;
31
32import java.util.ArrayList;
33import java.util.Arrays;
34import java.util.List;
35import java.util.regex.Pattern;
36
37public class Emoticons {
38
39 private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
40 private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
41 private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
42 private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
43 private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
44 private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
45 private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
46 private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
47 private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
48 private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
49 private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
50 private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
51 private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
52 private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
53 private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
54
55 private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
56
57 private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
58 GEOMETRIC_SHAPES,
59 LATIN_SUPPLEMENT,
60 CYK_SYMBOLS_AND_PUNCTUATION,
61 LETTERLIKE_SYMBOLS,
62 KEYCAP_COMBINEABLE);
63 private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
64 MISC_SYMBOLS_AND_PICTOGRAPHS,
65 SUPPLEMENTAL_SYMBOLS,
66 EMOTICONS,
67 TRANSPORT_SYMBOLS,
68 MISC_SYMBOLS,
69 DINGBATS,
70 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
71 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
72 MISC_TECHNICAL);
73
74 private static final int ZWJ = 0x200D;
75 private static final int VARIATION_16 = 0xFE0F;
76 private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
77 private static final int BLACK_FLAG = 0x1F3F4;
78 private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
79
80 private static List<Symbol> parse(String input) {
81 List<Symbol> symbols = new ArrayList<>();
82 Builder builder = new Builder();
83 boolean needsFinalBuild = false;
84 for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
85 cp = input.codePointAt(i);
86 if (builder.offer(cp)) {
87 needsFinalBuild = true;
88 } else {
89 symbols.add(builder.build());
90 builder = new Builder();
91 if (builder.offer(cp)) {
92 needsFinalBuild = true;
93 }
94 }
95 }
96 if (needsFinalBuild) {
97 symbols.add(builder.build());
98 }
99 return symbols;
100 }
101
102 public static Pattern generatePattern(CharSequence input) {
103 final StringBuilder pattern = new StringBuilder();
104 for(Symbol symbol : parse(input.toString())) {
105 if (symbol instanceof Emoji) {
106 if (pattern.length() != 0) {
107 pattern.append('|');
108 }
109 pattern.append(Pattern.quote(symbol.toString()));
110 }
111 }
112 return Pattern.compile(pattern.toString());
113 }
114
115 public static boolean isEmoji(String input) {
116 List<Symbol> symbols = parse(input);
117 return symbols.size() == 1 && symbols.get(0).isEmoji();
118 }
119
120 public static boolean isOnlyEmoji(String input) {
121 List<Symbol> symbols = parse(input);
122 for(Symbol symbol : symbols) {
123 if (!symbol.isEmoji()) {
124 return false;
125 }
126 }
127 return symbols.size() > 0;
128 }
129
130 private static abstract class Symbol {
131
132 private final String value;
133
134 public Symbol(List<Integer> codepoints) {
135 StringBuilder builder = new StringBuilder();
136 for(Integer codepoint : codepoints) {
137 builder.appendCodePoint(codepoint);
138 }
139 this.value = builder.toString();
140 }
141
142 abstract boolean isEmoji();
143
144 @Override
145 public String toString() {
146 return value;
147 }
148 }
149
150 public static class Emoji extends Symbol {
151
152 public Emoji(List<Integer> codepoints) {
153 super(codepoints);
154 }
155
156 @Override
157 boolean isEmoji() {
158 return true;
159 }
160 }
161
162 public static class Other extends Symbol {
163
164 public Other(List<Integer> codepoints) {
165 super(codepoints);
166 }
167
168 @Override
169 boolean isEmoji() {
170 return false;
171 }
172 }
173
174 private static class Builder {
175 private final List<Integer> codepoints = new ArrayList<>();
176
177
178 public boolean offer(int codepoint) {
179 boolean add = false;
180 if (this.codepoints.size() == 0) {
181 if (SYMBOLIZE.contains(codepoint)) {
182 add = true;
183 } else if (REGIONAL_INDICATORS.contains(codepoint)) {
184 add = true;
185 } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
186 add = true;
187 }
188 } else {
189 int previous = codepoints.get(codepoints.size() -1);
190 if (codepoints.get(0) == BLACK_FLAG) {
191 add = TAGS.contains(codepoint);
192 } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
193 add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
194 } else if (SYMBOLIZE.contains(previous)) {
195 add = codepoint == VARIATION_16;
196 } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
197 add = codepoints.size() == 1;
198 } else if (previous == VARIATION_16) {
199 add = isMerger(codepoint);
200 } else if (FITZPATRICK.contains(previous)) {
201 add = codepoint == ZWJ;
202 } else if (ZWJ == previous) {
203 add = EMOJIS.contains(codepoint);
204 } else if (isMerger(codepoint)) {
205 add = true;
206 } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
207 add = true;
208 }
209 }
210 if (add) {
211 codepoints.add(codepoint);
212 return true;
213 } else {
214 return false;
215 }
216 }
217
218 private static boolean isMerger(int codepoint) {
219 return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
220 }
221
222 public Symbol build() {
223 if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
224 return new Other(codepoints);
225 } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
226 return new Other(codepoints);
227 }
228 return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints);
229 }
230 }
231
232 public static class UnicodeBlocks implements UnicodeSet {
233 final UnicodeSet[] unicodeSets;
234
235 public UnicodeBlocks(UnicodeSet... sets) {
236 this.unicodeSets = sets;
237 }
238
239 @Override
240 public boolean contains(int codepoint) {
241 for(UnicodeSet unicodeSet : unicodeSets) {
242 if (unicodeSet.contains(codepoint)) {
243 return true;
244 }
245 }
246 return false;
247 }
248 }
249
250 public interface UnicodeSet {
251 boolean contains(int codepoint);
252 }
253
254 public static class UnicodeList implements UnicodeSet {
255
256 private final List<Integer> list;
257
258 public UnicodeList(Integer... codes) {
259 this.list = Arrays.asList(codes);
260 }
261
262 @Override
263 public boolean contains(int codepoint) {
264 return this.list.contains(codepoint);
265 }
266 }
267
268
269 public static class UnicodeRange implements UnicodeSet {
270
271 private final int lower;
272 private final int upper;
273
274 UnicodeRange(int lower, int upper) {
275 this.lower = lower;
276 this.upper = upper;
277 }
278
279 public boolean contains(int codePoint) {
280 return codePoint >= lower && codePoint <= upper;
281 }
282 }
283}