Emoticons.java

  1/*
  2 * Copyright (c) 2017, Daniel Gultsch All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 *
  7 * 1. Redistributions of source code must retain the above copyright notice, this
  8 * list of conditions and the following disclaimer.
  9 *
 10 * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 * this list of conditions and the following disclaimer in the documentation and/or
 12 * other materials provided with the distribution.
 13 *
 14 * 3. Neither the name of the copyright holder nor the names of its contributors
 15 * may be used to endorse or promote products derived from this software without
 16 * specific prior written permission.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 */
 29
 30package eu.siacs.conversations.utils;
 31
 32import android.util.LruCache;
 33
 34import androidx.annotation.NonNull;
 35
 36import com.google.common.collect.ImmutableSet;
 37
 38import java.util.ArrayList;
 39import java.util.Arrays;
 40import java.util.HashSet;
 41import java.util.List;
 42import java.util.Set;
 43import java.util.regex.Pattern;
 44
 45public class Emoticons {
 46
 47    private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS =
 48            new UnicodeRange(0x1F300, 0x1F5FF);
 49    private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
 50    private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1FAF6);
 51    // private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
 52    private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
 53    private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
 54    private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
 55            new UnicodeRange(0x1F100, 0x1F1FF);
 56    private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
 57            new UnicodeRange(0x1F200, 0x1F2FF);
 58    private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
 59    private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
 60    private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
 61    private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
 62    private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
 63    private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
 64    private static final UnicodeList LETTER_LIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
 65
 66    private static final UnicodeBlocks KEY_CAP_COMBINABLE =
 67            new UnicodeBlocks(
 68                    new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
 69
 70    private static final UnicodeBlocks SYMBOLIZE =
 71            new UnicodeBlocks(
 72                    GEOMETRIC_SHAPES,
 73                    LATIN_SUPPLEMENT,
 74                    CYK_SYMBOLS_AND_PUNCTUATION,
 75                    LETTER_LIKE_SYMBOLS,
 76                    KEY_CAP_COMBINABLE);
 77    private static final UnicodeBlocks EMOJIS =
 78            new UnicodeBlocks(
 79                    MISC_SYMBOLS_AND_PICTOGRAPHS,
 80                    SUPPLEMENTAL_SYMBOLS,
 81                    EMOTICONS,
 82                    // TRANSPORT_SYMBOLS,
 83                    MISC_SYMBOLS,
 84                    DINGBATS,
 85                    ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
 86                    ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 87                    MISC_TECHNICAL);
 88
 89    private static final int MAX_EMOJIS = 42;
 90
 91    private static final int ZWJ = 0x200D;
 92    private static final int VARIATION_16 = 0xFE0F;
 93    private static final int VARIATION_15 = 0xFE0E;
 94    private static final String VARIATION_16_STRING = new String(new char[] {VARIATION_16});
 95    private static final String VARIATION_15_STRING = new String(new char[] {VARIATION_15});
 96    private static final int COMBINING_ENCLOSING_KEY_CAP = 0x20E3;
 97    private static final int BLACK_FLAG = 0x1F3F4;
 98    private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
 99
100    private static final Set<String> TEXT_DEFAULT_TO_VS16 =
101            ImmutableSet.of(
102                    "",
103                    "",
104                    "",
105                    "",
106                    "",
107                    "",
108                    "",
109                    "",
110                    "\uD83C\uDF96",
111                    "\uD83C\uDFC6",
112                    "\uD83E\uDD47",
113                    "\uD83E\uDD48",
114                    "\uD83E\uDD49",
115                    "\uD83D\uDC51",
116                    "",
117                    "",
118                    "",
119                    "",
120                    "",
121                    "",
122                    "",
123                    "",
124                    "",
125                    "",
126                    "",
127                    "",
128                    "",
129                    "",
130                    "",
131                    "",
132                    "",
133                    "",
134                    "",
135                    "");
136
137    private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
138
139    public static String normalizeToVS16(final String input) {
140        return TEXT_DEFAULT_TO_VS16.contains(input) && !input.endsWith(VARIATION_15_STRING)
141                ? input + VARIATION_16_STRING
142                : input;
143    }
144
145    public static String existingVariant(final String original, final Set<String> existing) {
146        if (existing.contains(original) || original.endsWith(VARIATION_15_STRING)) {
147            return original;
148        }
149        final var variant =
150                original.endsWith(VARIATION_16_STRING)
151                        ? original.substring(0, original.length() - 1)
152                        : original + VARIATION_16_STRING;
153        return existing.contains(variant) ? variant : original;
154    }
155
156    private static List<Symbol> parse(String input) {
157        List<Symbol> symbols = new ArrayList<>();
158        Builder builder = new Builder();
159        boolean needsFinalBuild = false;
160        for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
161            cp = input.codePointAt(i);
162            if (builder.offer(cp)) {
163                needsFinalBuild = true;
164            } else {
165                symbols.add(builder.build());
166                builder = new Builder();
167                if (builder.offer(cp)) {
168                    needsFinalBuild = true;
169                }
170            }
171        }
172        if (needsFinalBuild) {
173            symbols.add(builder.build());
174        }
175        return symbols;
176    }
177
178    public static Pattern getEmojiPattern(final CharSequence input) {
179        Pattern pattern = CACHE.get(input);
180        if (pattern == null) {
181            pattern = generatePattern(input);
182            CACHE.put(input, pattern);
183        }
184        return pattern;
185    }
186
187    private static Pattern generatePattern(CharSequence input) {
188        final HashSet<String> emojis = new HashSet<>();
189        int i = 0;
190        for (final Symbol symbol : parse(input.toString())) {
191            if (symbol instanceof Emoji) {
192                emojis.add(symbol.toString());
193                if (++i >= MAX_EMOJIS) {
194                    return Pattern.compile("");
195                }
196            }
197        }
198        final StringBuilder pattern = new StringBuilder();
199        for (String emoji : emojis) {
200            if (pattern.length() != 0) {
201                pattern.append('|');
202            }
203            pattern.append(Pattern.quote(emoji));
204        }
205        return Pattern.compile(pattern.toString());
206    }
207
208    public static boolean isEmoji(String input) {
209        List<Symbol> symbols = parse(input);
210        return symbols.size() == 1 && symbols.get(0).isEmoji();
211    }
212
213    public static boolean isOnlyEmoji(String input) {
214        List<Symbol> symbols = parse(input);
215        for (Symbol symbol : symbols) {
216            if (!symbol.isEmoji()) {
217                return false;
218            }
219        }
220        return !symbols.isEmpty();
221    }
222
223    private abstract static class Symbol {
224
225        private final String value;
226
227        Symbol(List<Integer> codepoints) {
228            final StringBuilder builder = new StringBuilder();
229            for (final Integer codepoint : codepoints) {
230                builder.appendCodePoint(codepoint);
231            }
232            this.value = builder.toString();
233        }
234
235        abstract boolean isEmoji();
236
237        @NonNull
238        @Override
239        public String toString() {
240            return value;
241        }
242    }
243
244    public static class Emoji extends Symbol {
245
246        Emoji(List<Integer> codepoints) {
247            super(codepoints);
248        }
249
250        @Override
251        boolean isEmoji() {
252            return true;
253        }
254    }
255
256    public static class Other extends Symbol {
257
258        public Other(List<Integer> codepoints) {
259            super(codepoints);
260        }
261
262        @Override
263        boolean isEmoji() {
264            return false;
265        }
266    }
267
268    private static class Builder {
269        private final List<Integer> codepoints = new ArrayList<>();
270
271        public boolean offer(int codepoint) {
272            boolean add = false;
273            if (this.codepoints.isEmpty()) {
274                if (SYMBOLIZE.contains(codepoint)) {
275                    add = true;
276                } else if (REGIONAL_INDICATORS.contains(codepoint)) {
277                    add = true;
278                } else if (EMOJIS.contains(codepoint)
279                        && !FITZPATRICK.contains(codepoint)
280                        && codepoint != ZWJ) {
281                    add = true;
282                }
283            } else {
284                int previous = codepoints.get(codepoints.size() - 1);
285                if (codepoints.get(0) == BLACK_FLAG) {
286                    add = TAGS.contains(codepoint);
287                } else if (COMBINING_ENCLOSING_KEY_CAP == codepoint) {
288                    add = KEY_CAP_COMBINABLE.contains(previous) || previous == VARIATION_16;
289                } else if (SYMBOLIZE.contains(previous)) {
290                    add = codepoint == VARIATION_16;
291                } else if (REGIONAL_INDICATORS.contains(previous)
292                        && REGIONAL_INDICATORS.contains(codepoint)) {
293                    add = codepoints.size() == 1;
294                } else if (previous == VARIATION_16) {
295                    add = isMerger(codepoint) || codepoint == VARIATION_16;
296                } else if (FITZPATRICK.contains(previous)) {
297                    add = codepoint == ZWJ;
298                } else if (ZWJ == previous) {
299                    add = EMOJIS.contains(codepoint);
300                } else if (isMerger(codepoint)) {
301                    add = true;
302                } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
303                    add = true;
304                }
305            }
306            if (add) {
307                codepoints.add(codepoint);
308                return true;
309            } else {
310                return false;
311            }
312        }
313
314        private static boolean isMerger(int codepoint) {
315            return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
316        }
317
318        public Symbol build() {
319            if (!codepoints.isEmpty()
320                    && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
321                return new Other(codepoints);
322            } else if (codepoints.size() > 1
323                    && KEY_CAP_COMBINABLE.contains(codepoints.get(0))
324                    && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEY_CAP) {
325                return new Other(codepoints);
326            }
327            return codepoints.isEmpty() ? new Other(codepoints) : new Emoji(codepoints);
328        }
329    }
330
331    public static class UnicodeBlocks implements UnicodeSet {
332        final UnicodeSet[] unicodeSets;
333
334        UnicodeBlocks(final UnicodeSet... sets) {
335            this.unicodeSets = sets;
336        }
337
338        @Override
339        public boolean contains(int codepoint) {
340            for (UnicodeSet unicodeSet : unicodeSets) {
341                if (unicodeSet.contains(codepoint)) {
342                    return true;
343                }
344            }
345            return false;
346        }
347    }
348
349    public interface UnicodeSet {
350        boolean contains(int codepoint);
351    }
352
353    public static class UnicodeList implements UnicodeSet {
354
355        private final List<Integer> list;
356
357        UnicodeList(final Integer... codes) {
358            this.list = Arrays.asList(codes);
359        }
360
361        @Override
362        public boolean contains(int codepoint) {
363            return this.list.contains(codepoint);
364        }
365    }
366
367    public static class UnicodeRange implements UnicodeSet {
368
369        private final int lower;
370        private final int upper;
371
372        UnicodeRange(int lower, int upper) {
373            this.lower = lower;
374            this.upper = upper;
375        }
376
377        public boolean contains(int codePoint) {
378            return codePoint >= lower && codePoint <= upper;
379        }
380    }
381}