fix emojis not rendering correctly with trailing variant selector. fixes #3819

Daniel Gultsch created

Change summary

src/main/java/eu/siacs/conversations/utils/Emoticons.java | 533 ++++----
1 file changed, 267 insertions(+), 266 deletions(-)

Detailed changes

src/main/java/eu/siacs/conversations/utils/Emoticons.java 🔗

@@ -29,280 +29,281 @@
 
 package eu.siacs.conversations.utils;
 
+import android.support.annotation.NonNull;
 import android.util.LruCache;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.regex.Pattern;
 
 public class Emoticons {
 
-	private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
-	private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
-	private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
-	private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
-	private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
-	private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
-	private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
-	private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
-	private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
-	private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
-	private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
-	private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
-	private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
-	private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
-	private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
-
-	private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
-
-	private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
-			GEOMETRIC_SHAPES,
-			LATIN_SUPPLEMENT,
-			CYK_SYMBOLS_AND_PUNCTUATION,
-			LETTERLIKE_SYMBOLS,
-			KEYCAP_COMBINEABLE);
-	private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
-			MISC_SYMBOLS_AND_PICTOGRAPHS,
-			SUPPLEMENTAL_SYMBOLS,
-			EMOTICONS,
-			TRANSPORT_SYMBOLS,
-			MISC_SYMBOLS,
-			DINGBATS,
-			ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
-			ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
-			MISC_TECHNICAL);
-
-	private static final int MAX_EMOIJS = 42;
-
-	private static final int ZWJ = 0x200D;
-	private static final int VARIATION_16 = 0xFE0F;
-	private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
-	private static final int BLACK_FLAG = 0x1F3F4;
-	private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
-
-	private static final LruCache<CharSequence,Pattern> CACHE = new LruCache<>(256);
-
-	private static List<Symbol> parse(String input) {
-		List<Symbol> symbols = new ArrayList<>();
-		Builder builder = new Builder();
-		boolean needsFinalBuild = false;
-		for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
-			cp = input.codePointAt(i);
-			if (builder.offer(cp)) {
-				needsFinalBuild = true;
-			} else {
-				symbols.add(builder.build());
-				builder = new Builder();
-				if (builder.offer(cp)) {
-					needsFinalBuild = true;
-				}
-			}
-		}
-		if (needsFinalBuild) {
-			symbols.add(builder.build());
-		}
-		return symbols;
-	}
-
-	public static Pattern getEmojiPattern(CharSequence input) {
-		Pattern pattern = CACHE.get(input);
-		if (pattern == null) {
-			pattern = generatePattern(input);
-			CACHE.put(input, pattern);
-		}
-		return pattern;
-	}
-
-	private static Pattern generatePattern(CharSequence input) {
-		final HashSet<String> emojis = new HashSet<>();
-		int i = 0;
-		for(Symbol symbol : parse(input.toString())) {
-			if (symbol instanceof Emoji) {
-				emojis.add(symbol.toString());
-				if (++i >= MAX_EMOIJS) {
-					return Pattern.compile("");
-				}
-			}
-		}
-		final StringBuilder pattern = new StringBuilder();
-		for(String emoji : emojis) {
-			if (pattern.length() != 0) {
-				pattern.append('|');
-			}
-			pattern.append(Pattern.quote(emoji));
-		}
-		return Pattern.compile(pattern.toString());
-	}
-
-	public static boolean isEmoji(String input) {
-		List<Symbol> symbols = parse(input);
-		return symbols.size() == 1 && symbols.get(0).isEmoji();
-	}
-
-	public static boolean isOnlyEmoji(String input) {
-		List<Symbol> symbols = parse(input);
-		for(Symbol symbol : symbols) {
-			if (!symbol.isEmoji()) {
-				return false;
-			}
-		}
-		return symbols.size() > 0;
-	}
-
-	private static abstract class Symbol {
-
-		private final String value;
-
-		public Symbol(List<Integer> codepoints) {
-			StringBuilder builder = new StringBuilder();
-			for(Integer codepoint : codepoints) {
-				builder.appendCodePoint(codepoint);
-			}
-			this.value = builder.toString();
-		}
-
-		abstract boolean isEmoji();
-
-		@Override
-		public String toString() {
-			return value;
-		}
-	}
-
-	public static class Emoji extends Symbol {
-
-		public Emoji(List<Integer> codepoints) {
-			super(codepoints);
-		}
-
-		@Override
-		boolean isEmoji() {
-			return true;
-		}
-	}
-
-	public static class Other extends Symbol {
-
-		public Other(List<Integer> codepoints) {
-			super(codepoints);
-		}
-
-		@Override
-		boolean isEmoji() {
-			return false;
-		}
-	}
-
-	private static class Builder {
-		private final List<Integer> codepoints = new ArrayList<>();
-
-
-		public boolean offer(int codepoint) {
-			boolean add = false;
-			if (this.codepoints.size() == 0) {
-				if (SYMBOLIZE.contains(codepoint)) {
-					add = true;
-				} else if (REGIONAL_INDICATORS.contains(codepoint)) {
-					add = true;
-				} else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
-					add = true;
-				}
-			} else {
-				int previous = codepoints.get(codepoints.size() -1);
-				if (codepoints.get(0) == BLACK_FLAG) {
-					add = TAGS.contains(codepoint);
-				} else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
-					add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
-				} else if (SYMBOLIZE.contains(previous)) {
-					add = codepoint == VARIATION_16;
-				} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
-					add = codepoints.size() == 1;
-				} else if (previous == VARIATION_16) {
-					add = isMerger(codepoint);
-				} else if (FITZPATRICK.contains(previous)) {
-					add = codepoint == ZWJ;
-				} else if (ZWJ == previous) {
-					add = EMOJIS.contains(codepoint);
-				} else if (isMerger(codepoint)) {
-					add = true;
-				} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
-					add = true;
-				}
-			}
-			if (add) {
-				codepoints.add(codepoint);
-				return true;
-			} else {
-				return false;
-			}
-		}
-
-		private static boolean isMerger(int codepoint) {
-			return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
-		}
-
-		public Symbol build() {
-			if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
-				return new Other(codepoints);
-			} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
-				return new Other(codepoints);
-			}
-			return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints);
-		}
-	}
-
-	public static class UnicodeBlocks implements UnicodeSet {
-		final UnicodeSet[] unicodeSets;
-
-		public UnicodeBlocks(UnicodeSet... sets) {
-			this.unicodeSets = sets;
-		}
-
-		@Override
-		public boolean contains(int codepoint) {
-			for(UnicodeSet unicodeSet : unicodeSets) {
-				if (unicodeSet.contains(codepoint)) {
-					return true;
-				}
-			}
-			return false;
-		}
-	}
-
-	public interface UnicodeSet {
-		boolean contains(int codepoint);
-	}
-
-	public static class UnicodeList implements UnicodeSet {
-
-		private final List<Integer> list;
-
-		public UnicodeList(Integer... codes) {
-			this.list = Arrays.asList(codes);
-		}
-
-		@Override
-		public boolean contains(int codepoint) {
-			return this.list.contains(codepoint);
-		}
-	}
-
-
-	public static class UnicodeRange implements UnicodeSet {
-
-		private final int lower;
-		private final int upper;
-
-		UnicodeRange(int lower, int upper) {
-			this.lower = lower;
-			this.upper = upper;
-		}
-
-		public boolean contains(int codePoint) {
-			return codePoint >= lower && codePoint <= upper;
-		}
-	}
+    private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
+    private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
+    private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1F64F);
+    private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
+    private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
+    private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
+    private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
+    private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
+    private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
+    private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
+    private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
+    private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
+    private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
+    private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
+    private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
+
+    private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
+
+    private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
+            GEOMETRIC_SHAPES,
+            LATIN_SUPPLEMENT,
+            CYK_SYMBOLS_AND_PUNCTUATION,
+            LETTERLIKE_SYMBOLS,
+            KEYCAP_COMBINEABLE);
+    private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
+            MISC_SYMBOLS_AND_PICTOGRAPHS,
+            SUPPLEMENTAL_SYMBOLS,
+            EMOTICONS,
+            TRANSPORT_SYMBOLS,
+            MISC_SYMBOLS,
+            DINGBATS,
+            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
+            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
+            MISC_TECHNICAL);
+
+    private static final int MAX_EMOIJS = 42;
+
+    private static final int ZWJ = 0x200D;
+    private static final int VARIATION_16 = 0xFE0F;
+    private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
+    private static final int BLACK_FLAG = 0x1F3F4;
+    private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
+
+    private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
+
+    private static List<Symbol> parse(String input) {
+        List<Symbol> symbols = new ArrayList<>();
+        Builder builder = new Builder();
+        boolean needsFinalBuild = false;
+        for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
+            cp = input.codePointAt(i);
+            if (builder.offer(cp)) {
+                needsFinalBuild = true;
+            } else {
+                symbols.add(builder.build());
+                builder = new Builder();
+                if (builder.offer(cp)) {
+                    needsFinalBuild = true;
+                }
+            }
+        }
+        if (needsFinalBuild) {
+            symbols.add(builder.build());
+        }
+        return symbols;
+    }
+
+    public static Pattern getEmojiPattern(final CharSequence input) {
+        Pattern pattern = CACHE.get(input);
+        if (pattern == null) {
+            pattern = generatePattern(input);
+            CACHE.put(input, pattern);
+        }
+        return pattern;
+    }
+
+    private static Pattern generatePattern(CharSequence input) {
+        final HashSet<String> emojis = new HashSet<>();
+        int i = 0;
+        for (final Symbol symbol : parse(input.toString())) {
+            if (symbol instanceof Emoji) {
+                emojis.add(symbol.toString());
+                if (++i >= MAX_EMOIJS) {
+                    return Pattern.compile("");
+                }
+            }
+        }
+        final StringBuilder pattern = new StringBuilder();
+        for (String emoji : emojis) {
+            if (pattern.length() != 0) {
+                pattern.append('|');
+            }
+            pattern.append(Pattern.quote(emoji));
+        }
+        return Pattern.compile(pattern.toString());
+    }
+
+    public static boolean isEmoji(String input) {
+        List<Symbol> symbols = parse(input);
+        return symbols.size() == 1 && symbols.get(0).isEmoji();
+    }
+
+    public static boolean isOnlyEmoji(String input) {
+        List<Symbol> symbols = parse(input);
+        for (Symbol symbol : symbols) {
+            if (!symbol.isEmoji()) {
+                return false;
+            }
+        }
+        return symbols.size() > 0;
+    }
+
+    private static abstract class Symbol {
+
+        private final String value;
+
+        Symbol(List<Integer> codepoints) {
+            final StringBuilder builder = new StringBuilder();
+            for (final Integer codepoint : codepoints) {
+                builder.appendCodePoint(codepoint);
+            }
+            this.value = builder.toString();
+        }
+
+        abstract boolean isEmoji();
+
+        @NonNull
+        @Override
+        public String toString() {
+            return value;
+        }
+    }
+
+    public static class Emoji extends Symbol {
+
+        Emoji(List<Integer> codepoints) {
+            super(codepoints);
+        }
+
+        @Override
+        boolean isEmoji() {
+            return true;
+        }
+    }
+
+    public static class Other extends Symbol {
+
+        public Other(List<Integer> codepoints) {
+            super(codepoints);
+        }
+
+        @Override
+        boolean isEmoji() {
+            return false;
+        }
+    }
+
+    private static class Builder {
+        private final List<Integer> codepoints = new ArrayList<>();
+
+
+        public boolean offer(int codepoint) {
+            boolean add = false;
+            if (this.codepoints.size() == 0) {
+                if (SYMBOLIZE.contains(codepoint)) {
+                    add = true;
+                } else if (REGIONAL_INDICATORS.contains(codepoint)) {
+                    add = true;
+                } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
+                    add = true;
+                }
+            } else {
+                int previous = codepoints.get(codepoints.size() - 1);
+                if (codepoints.get(0) == BLACK_FLAG) {
+                    add = TAGS.contains(codepoint);
+                } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
+                    add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
+                } else if (SYMBOLIZE.contains(previous)) {
+                    add = codepoint == VARIATION_16;
+                } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
+                    add = codepoints.size() == 1;
+                } else if (previous == VARIATION_16) {
+                    add = isMerger(codepoint) || codepoint == VARIATION_16;
+                } else if (FITZPATRICK.contains(previous)) {
+                    add = codepoint == ZWJ;
+                } else if (ZWJ == previous) {
+                    add = EMOJIS.contains(codepoint);
+                } else if (isMerger(codepoint)) {
+                    add = true;
+                } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
+                    add = true;
+                }
+            }
+            if (add) {
+                codepoints.add(codepoint);
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        private static boolean isMerger(int codepoint) {
+            return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
+        }
+
+        public Symbol build() {
+            if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
+                return new Other(codepoints);
+            } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
+                return new Other(codepoints);
+            }
+            return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
+        }
+    }
+
+    public static class UnicodeBlocks implements UnicodeSet {
+        final UnicodeSet[] unicodeSets;
+
+        UnicodeBlocks(final UnicodeSet... sets) {
+            this.unicodeSets = sets;
+        }
+
+        @Override
+        public boolean contains(int codepoint) {
+            for (UnicodeSet unicodeSet : unicodeSets) {
+                if (unicodeSet.contains(codepoint)) {
+                    return true;
+                }
+            }
+            return false;
+        }
+    }
+
+    public interface UnicodeSet {
+        boolean contains(int codepoint);
+    }
+
+    public static class UnicodeList implements UnicodeSet {
+
+        private final List<Integer> list;
+
+        UnicodeList(final Integer... codes) {
+            this.list = Arrays.asList(codes);
+        }
+
+        @Override
+        public boolean contains(int codepoint) {
+            return this.list.contains(codepoint);
+        }
+    }
+
+
+    public static class UnicodeRange implements UnicodeSet {
+
+        private final int lower;
+        private final int upper;
+
+        UnicodeRange(int lower, int upper) {
+            this.lower = lower;
+            this.upper = upper;
+        }
+
+        public boolean contains(int codePoint) {
+            return codePoint >= lower && codePoint <= upper;
+        }
+    }
 }