Emoticons.java

  1/*
  2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 *
  7 * 1. Redistributions of source code must retain the above copyright notice, this
  8 * list of conditions and the following disclaimer.
  9 *
 10 * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 * this list of conditions and the following disclaimer in the documentation and/or
 12 * other materials provided with the distribution.
 13 *
 14 * 3. Neither the name of the copyright holder nor the names of its contributors
 15 * may be used to endorse or promote products derived from this software without
 16 * specific prior written permission.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 */
 29
 30package eu.siacs.conversations.utils;
 31
 32import android.util.LruCache;
 33
 34import androidx.annotation.NonNull;
 35
 36import java.util.ArrayList;
 37import java.util.Arrays;
 38import java.util.HashSet;
 39import java.util.List;
 40import java.util.regex.Pattern;
 41
 42public class Emoticons {
 43
 44    private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
 45    private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
 46    private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1FAF6);
 47    //private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
 48    private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
 49    private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
 50    private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
 51    private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
 52    private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
 53    private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
 54    private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
 55    private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
 56    private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
 57    private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
 58    private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
 59
 60    private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
 61
 62    private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
 63            GEOMETRIC_SHAPES,
 64            LATIN_SUPPLEMENT,
 65            CYK_SYMBOLS_AND_PUNCTUATION,
 66            LETTERLIKE_SYMBOLS,
 67            KEYCAP_COMBINEABLE);
 68    private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
 69            MISC_SYMBOLS_AND_PICTOGRAPHS,
 70            SUPPLEMENTAL_SYMBOLS,
 71            EMOTICONS,
 72            //TRANSPORT_SYMBOLS,
 73            MISC_SYMBOLS,
 74            DINGBATS,
 75            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
 76            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 77            MISC_TECHNICAL);
 78
 79    private static final int MAX_EMOIJS = 42;
 80
 81    private static final int ZWJ = 0x200D;
 82    private static final int VARIATION_16 = 0xFE0F;
 83    private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
 84    private static final int BLACK_FLAG = 0x1F3F4;
 85    private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
 86
 87    private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
 88
 89    private static List<Symbol> parse(String input) {
 90        List<Symbol> symbols = new ArrayList<>();
 91        Builder builder = new Builder();
 92        boolean needsFinalBuild = false;
 93        for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
 94            cp = input.codePointAt(i);
 95            if (builder.offer(cp)) {
 96                needsFinalBuild = true;
 97            } else {
 98                symbols.add(builder.build());
 99                builder = new Builder();
100                if (builder.offer(cp)) {
101                    needsFinalBuild = true;
102                }
103            }
104        }
105        if (needsFinalBuild) {
106            symbols.add(builder.build());
107        }
108        return symbols;
109    }
110
111    public static Pattern getEmojiPattern(final CharSequence input) {
112        Pattern pattern = CACHE.get(input);
113        if (pattern == null) {
114            pattern = generatePattern(input);
115            CACHE.put(input, pattern);
116        }
117        return pattern;
118    }
119
120    private static Pattern generatePattern(CharSequence input) {
121        final HashSet<String> emojis = new HashSet<>();
122        int i = 0;
123        for (final Symbol symbol : parse(input.toString())) {
124            if (symbol instanceof Emoji) {
125                emojis.add(symbol.toString());
126                if (++i >= MAX_EMOIJS) {
127                    return Pattern.compile("");
128                }
129            }
130        }
131        final StringBuilder pattern = new StringBuilder();
132        for (String emoji : emojis) {
133            if (pattern.length() != 0) {
134                pattern.append('|');
135            }
136            pattern.append(Pattern.quote(emoji));
137        }
138        return Pattern.compile(pattern.toString());
139    }
140
141    public static boolean isEmoji(String input) {
142        List<Symbol> symbols = parse(input);
143        return symbols.size() == 1 && symbols.get(0).isEmoji();
144    }
145
146    public static boolean isOnlyEmoji(String input) {
147        List<Symbol> symbols = parse(input);
148        for (Symbol symbol : symbols) {
149            if (!symbol.isEmoji()) {
150                return false;
151            }
152        }
153        return symbols.size() > 0;
154    }
155
156    private static abstract class Symbol {
157
158        private final String value;
159
160        Symbol(List<Integer> codepoints) {
161            final StringBuilder builder = new StringBuilder();
162            for (final Integer codepoint : codepoints) {
163                builder.appendCodePoint(codepoint);
164            }
165            this.value = builder.toString();
166        }
167
168        abstract boolean isEmoji();
169
170        @NonNull
171        @Override
172        public String toString() {
173            return value;
174        }
175    }
176
177    public static class Emoji extends Symbol {
178
179        Emoji(List<Integer> codepoints) {
180            super(codepoints);
181        }
182
183        @Override
184        boolean isEmoji() {
185            return true;
186        }
187    }
188
189    public static class Other extends Symbol {
190
191        public Other(List<Integer> codepoints) {
192            super(codepoints);
193        }
194
195        @Override
196        boolean isEmoji() {
197            return false;
198        }
199    }
200
201    private static class Builder {
202        private final List<Integer> codepoints = new ArrayList<>();
203
204
205        public boolean offer(int codepoint) {
206            boolean add = false;
207            if (this.codepoints.size() == 0) {
208                if (SYMBOLIZE.contains(codepoint)) {
209                    add = true;
210                } else if (REGIONAL_INDICATORS.contains(codepoint)) {
211                    add = true;
212                } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
213                    add = true;
214                }
215            } else {
216                int previous = codepoints.get(codepoints.size() - 1);
217                if (codepoints.get(0) == BLACK_FLAG) {
218                    add = TAGS.contains(codepoint);
219                } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
220                    add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
221                } else if (SYMBOLIZE.contains(previous)) {
222                    add = codepoint == VARIATION_16;
223                } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
224                    add = codepoints.size() == 1;
225                } else if (previous == VARIATION_16) {
226                    add = isMerger(codepoint) || codepoint == VARIATION_16;
227                } else if (FITZPATRICK.contains(previous)) {
228                    add = codepoint == ZWJ;
229                } else if (ZWJ == previous) {
230                    add = EMOJIS.contains(codepoint);
231                } else if (isMerger(codepoint)) {
232                    add = true;
233                } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
234                    add = true;
235                }
236            }
237            if (add) {
238                codepoints.add(codepoint);
239                return true;
240            } else {
241                return false;
242            }
243        }
244
245        private static boolean isMerger(int codepoint) {
246            return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
247        }
248
249        public Symbol build() {
250            if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
251                return new Other(codepoints);
252            } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
253                return new Other(codepoints);
254            }
255            return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
256        }
257    }
258
259    public static class UnicodeBlocks implements UnicodeSet {
260        final UnicodeSet[] unicodeSets;
261
262        UnicodeBlocks(final UnicodeSet... sets) {
263            this.unicodeSets = sets;
264        }
265
266        @Override
267        public boolean contains(int codepoint) {
268            for (UnicodeSet unicodeSet : unicodeSets) {
269                if (unicodeSet.contains(codepoint)) {
270                    return true;
271                }
272            }
273            return false;
274        }
275    }
276
277    public interface UnicodeSet {
278        boolean contains(int codepoint);
279    }
280
281    public static class UnicodeList implements UnicodeSet {
282
283        private final List<Integer> list;
284
285        UnicodeList(final Integer... codes) {
286            this.list = Arrays.asList(codes);
287        }
288
289        @Override
290        public boolean contains(int codepoint) {
291            return this.list.contains(codepoint);
292        }
293    }
294
295
296    public static class UnicodeRange implements UnicodeSet {
297
298        private final int lower;
299        private final int upper;
300
301        UnicodeRange(int lower, int upper) {
302            this.lower = lower;
303            this.upper = upper;
304        }
305
306        public boolean contains(int codePoint) {
307            return codePoint >= lower && codePoint <= upper;
308        }
309    }
310}