1package eu.siacs.conversations.utils;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.List;
6
7public class Emoticons {
8
9 private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
10 private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
11 private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
12 private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
13 private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
14 private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
15 private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
16 private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
17 private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
18 private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
19 private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
20 private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
21 private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
22 private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
23 private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
24
25 private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
26
27 private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
28 GEOMETRIC_SHAPES,
29 LATIN_SUPPLEMENT,
30 CYK_SYMBOLS_AND_PUNCTUATION,
31 LETTERLIKE_SYMBOLS,
32 KEYCAP_COMBINEABLE);
33 private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
34 MISC_SYMBOLS_AND_PICTOGRAPHS,
35 SUPPLEMENTAL_SYMBOLS,
36 EMOTICONS,
37 TRANSPORT_SYMBOLS,
38 MISC_SYMBOLS,
39 DINGBATS,
40 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
41 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
42 MISC_TECHNICAL);
43
44 private static final int ZWJ = 0x200D;
45 private static final int VARIATION_16 = 0xFE0F;
46 private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
47 private static final int BLACK_FLAG = 0x1F3F4;
48 private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
49
50 private static List<Symbol> parse(String input) {
51 List<Symbol> symbols = new ArrayList<>();
52 Builder builder = new Builder();
53 boolean needsFinalBuild = false;
54 for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
55 cp = input.codePointAt(i);
56 if (builder.offer(cp)) {
57 needsFinalBuild = true;
58 } else {
59 symbols.add(builder.build());
60 builder = new Builder();
61 if (builder.offer(cp)) {
62 needsFinalBuild = true;
63 }
64 }
65 }
66 if (needsFinalBuild) {
67 symbols.add(builder.build());
68 }
69 return symbols;
70 }
71
72 public static boolean isEmoji(String input) {
73 List<Symbol> symbols = parse(input);
74 return symbols.size() == 1 && symbols.get(0) == Symbol.EMOJI;
75 }
76
77 public static boolean isOnlyEmoji(String input) {
78 List<Symbol> symbols = parse(input);
79 for(Symbol symbol : symbols) {
80 if (symbol == Symbol.NON_EMOJI) {
81 return false;
82 }
83 }
84 return symbols.size() > 0;
85 }
86
87 private enum Symbol {
88 EMOJI, NON_EMOJI
89 }
90
91
92 private static class Builder {
93 private final List<Integer> codepoints = new ArrayList<>();
94
95
96 public boolean offer(int codepoint) {
97 boolean add = false;
98 if (this.codepoints.size() == 0) {
99 if (SYMBOLIZE.contains(codepoint)) {
100 add = true;
101 } else if (REGIONAL_INDICATORS.contains(codepoint)) {
102 add = true;
103 } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
104 add = true;
105 }
106 } else {
107 int previous = codepoints.get(codepoints.size() -1);
108 if (codepoints.get(0) == BLACK_FLAG) {
109 add = TAGS.contains(codepoint);
110 } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
111 add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
112 } else if (SYMBOLIZE.contains(previous)) {
113 add = codepoint == VARIATION_16;
114 } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
115 add = codepoints.size() == 1;
116 } else if (previous == VARIATION_16) {
117 add = isMerger(codepoint);
118 } else if (FITZPATRICK.contains(previous)) {
119 add = codepoint == ZWJ;
120 } else if (ZWJ == previous) {
121 add = EMOJIS.contains(codepoint);
122 } else if (isMerger(codepoint)) {
123 add = true;
124 } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
125 add = true;
126 }
127 }
128 if (add) {
129 codepoints.add(codepoint);
130 return true;
131 } else {
132 return false;
133 }
134 }
135
136 private static boolean isMerger(int codepoint) {
137 return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
138 }
139
140 public Symbol build() {
141 if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
142 return Symbol.NON_EMOJI;
143 } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
144 return Symbol.NON_EMOJI;
145 }
146 return codepoints.size() == 0 ? Symbol.NON_EMOJI : Symbol.EMOJI;
147 }
148 }
149
150 public static class UnicodeBlocks implements UnicodeSet {
151 final UnicodeSet[] unicodeSets;
152
153 public UnicodeBlocks(UnicodeSet... sets) {
154 this.unicodeSets = sets;
155 }
156
157 @Override
158 public boolean contains(int codepoint) {
159 for(UnicodeSet unicodeSet : unicodeSets) {
160 if (unicodeSet.contains(codepoint)) {
161 return true;
162 }
163 }
164 return false;
165 }
166 }
167
168 public interface UnicodeSet {
169 boolean contains(int codepoint);
170 }
171
172 public static class UnicodeList implements UnicodeSet {
173
174 private final List<Integer> list;
175
176 public UnicodeList(Integer... codes) {
177 this.list = Arrays.asList(codes);
178 }
179
180 @Override
181 public boolean contains(int codepoint) {
182 return this.list.contains(codepoint);
183 }
184 }
185
186
187 public static class UnicodeRange implements UnicodeSet {
188
189 private final int lower;
190 private final int upper;
191
192 UnicodeRange(int lower, int upper) {
193 this.lower = lower;
194 this.upper = upper;
195 }
196
197 public boolean contains(int codePoint) {
198 return codePoint >= lower && codePoint <= upper;
199 }
200 }
201}