Emoticons.java

  1/*
  2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 *
  7 * 1. Redistributions of source code must retain the above copyright notice, this
  8 * list of conditions and the following disclaimer.
  9 *
 10 * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 * this list of conditions and the following disclaimer in the documentation and/or
 12 * other materials provided with the distribution.
 13 *
 14 * 3. Neither the name of the copyright holder nor the names of its contributors
 15 * may be used to endorse or promote products derived from this software without
 16 * specific prior written permission.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 */
 29
 30package eu.siacs.conversations.utils;
 31
 32import androidx.annotation.NonNull;
 33import android.util.LruCache;
 34
 35import java.util.ArrayList;
 36import java.util.Arrays;
 37import java.util.HashSet;
 38import java.util.List;
 39import java.util.regex.Pattern;
 40
 41public class Emoticons {
 42
 43    private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
 44    private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
 45    private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1F64F);
 46    private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
 47    private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
 48    private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
 49    private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
 50    private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
 51    private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
 52    private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
 53    private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
 54    private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
 55    private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
 56    private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
 57    private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
 58
 59    private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
 60
 61    private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
 62            GEOMETRIC_SHAPES,
 63            LATIN_SUPPLEMENT,
 64            CYK_SYMBOLS_AND_PUNCTUATION,
 65            LETTERLIKE_SYMBOLS,
 66            KEYCAP_COMBINEABLE);
 67    private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
 68            MISC_SYMBOLS_AND_PICTOGRAPHS,
 69            SUPPLEMENTAL_SYMBOLS,
 70            EMOTICONS,
 71            TRANSPORT_SYMBOLS,
 72            MISC_SYMBOLS,
 73            DINGBATS,
 74            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
 75            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 76            MISC_TECHNICAL);
 77
 78    private static final int MAX_EMOIJS = 42;
 79
 80    private static final int ZWJ = 0x200D;
 81    private static final int VARIATION_16 = 0xFE0F;
 82    private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
 83    private static final int BLACK_FLAG = 0x1F3F4;
 84    private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
 85
 86    private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
 87
 88    private static List<Symbol> parse(String input) {
 89        List<Symbol> symbols = new ArrayList<>();
 90        Builder builder = new Builder();
 91        boolean needsFinalBuild = false;
 92        for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
 93            cp = input.codePointAt(i);
 94            if (builder.offer(cp)) {
 95                needsFinalBuild = true;
 96            } else {
 97                symbols.add(builder.build());
 98                builder = new Builder();
 99                if (builder.offer(cp)) {
100                    needsFinalBuild = true;
101                }
102            }
103        }
104        if (needsFinalBuild) {
105            symbols.add(builder.build());
106        }
107        return symbols;
108    }
109
110    public static Pattern getEmojiPattern(final CharSequence input) {
111        Pattern pattern = CACHE.get(input);
112        if (pattern == null) {
113            pattern = generatePattern(input);
114            CACHE.put(input, pattern);
115        }
116        return pattern;
117    }
118
119    private static Pattern generatePattern(CharSequence input) {
120        final HashSet<String> emojis = new HashSet<>();
121        int i = 0;
122        for (final Symbol symbol : parse(input.toString())) {
123            if (symbol instanceof Emoji) {
124                emojis.add(symbol.toString());
125                if (++i >= MAX_EMOIJS) {
126                    return Pattern.compile("");
127                }
128            }
129        }
130        final StringBuilder pattern = new StringBuilder();
131        for (String emoji : emojis) {
132            if (pattern.length() != 0) {
133                pattern.append('|');
134            }
135            pattern.append(Pattern.quote(emoji));
136        }
137        return Pattern.compile(pattern.toString());
138    }
139
140    public static boolean isEmoji(String input) {
141        List<Symbol> symbols = parse(input);
142        return symbols.size() == 1 && symbols.get(0).isEmoji();
143    }
144
145    public static boolean isOnlyEmoji(String input) {
146        List<Symbol> symbols = parse(input);
147        for (Symbol symbol : symbols) {
148            if (!symbol.isEmoji()) {
149                return false;
150            }
151        }
152        return symbols.size() > 0;
153    }
154
155    private static abstract class Symbol {
156
157        private final String value;
158
159        Symbol(List<Integer> codepoints) {
160            final StringBuilder builder = new StringBuilder();
161            for (final Integer codepoint : codepoints) {
162                builder.appendCodePoint(codepoint);
163            }
164            this.value = builder.toString();
165        }
166
167        abstract boolean isEmoji();
168
169        @NonNull
170        @Override
171        public String toString() {
172            return value;
173        }
174    }
175
176    public static class Emoji extends Symbol {
177
178        Emoji(List<Integer> codepoints) {
179            super(codepoints);
180        }
181
182        @Override
183        boolean isEmoji() {
184            return true;
185        }
186    }
187
188    public static class Other extends Symbol {
189
190        public Other(List<Integer> codepoints) {
191            super(codepoints);
192        }
193
194        @Override
195        boolean isEmoji() {
196            return false;
197        }
198    }
199
200    private static class Builder {
201        private final List<Integer> codepoints = new ArrayList<>();
202
203
204        public boolean offer(int codepoint) {
205            boolean add = false;
206            if (this.codepoints.size() == 0) {
207                if (SYMBOLIZE.contains(codepoint)) {
208                    add = true;
209                } else if (REGIONAL_INDICATORS.contains(codepoint)) {
210                    add = true;
211                } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
212                    add = true;
213                }
214            } else {
215                int previous = codepoints.get(codepoints.size() - 1);
216                if (codepoints.get(0) == BLACK_FLAG) {
217                    add = TAGS.contains(codepoint);
218                } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
219                    add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
220                } else if (SYMBOLIZE.contains(previous)) {
221                    add = codepoint == VARIATION_16;
222                } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
223                    add = codepoints.size() == 1;
224                } else if (previous == VARIATION_16) {
225                    add = isMerger(codepoint) || codepoint == VARIATION_16;
226                } else if (FITZPATRICK.contains(previous)) {
227                    add = codepoint == ZWJ;
228                } else if (ZWJ == previous) {
229                    add = EMOJIS.contains(codepoint);
230                } else if (isMerger(codepoint)) {
231                    add = true;
232                } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
233                    add = true;
234                }
235            }
236            if (add) {
237                codepoints.add(codepoint);
238                return true;
239            } else {
240                return false;
241            }
242        }
243
244        private static boolean isMerger(int codepoint) {
245            return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
246        }
247
248        public Symbol build() {
249            if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
250                return new Other(codepoints);
251            } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
252                return new Other(codepoints);
253            }
254            return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
255        }
256    }
257
258    public static class UnicodeBlocks implements UnicodeSet {
259        final UnicodeSet[] unicodeSets;
260
261        UnicodeBlocks(final UnicodeSet... sets) {
262            this.unicodeSets = sets;
263        }
264
265        @Override
266        public boolean contains(int codepoint) {
267            for (UnicodeSet unicodeSet : unicodeSets) {
268                if (unicodeSet.contains(codepoint)) {
269                    return true;
270                }
271            }
272            return false;
273        }
274    }
275
276    public interface UnicodeSet {
277        boolean contains(int codepoint);
278    }
279
280    public static class UnicodeList implements UnicodeSet {
281
282        private final List<Integer> list;
283
284        UnicodeList(final Integer... codes) {
285            this.list = Arrays.asList(codes);
286        }
287
288        @Override
289        public boolean contains(int codepoint) {
290            return this.list.contains(codepoint);
291        }
292    }
293
294
295    public static class UnicodeRange implements UnicodeSet {
296
297        private final int lower;
298        private final int upper;
299
300        UnicodeRange(int lower, int upper) {
301            this.lower = lower;
302            this.upper = upper;
303        }
304
305        public boolean contains(int codePoint) {
306            return codePoint >= lower && codePoint <= upper;
307        }
308    }
309}