IrregularUnicodeDetector.java

  1/*
  2 * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 *
  7 * 1. Redistributions of source code must retain the above copyright notice, this
  8 * list of conditions and the following disclaimer.
  9 *
 10 * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 * this list of conditions and the following disclaimer in the documentation and/or
 12 * other materials provided with the distribution.
 13 *
 14 * 3. Neither the name of the copyright holder nor the names of its contributors
 15 * may be used to endorse or promote products derived from this software without
 16 * specific prior written permission.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 */
 29
 30package eu.siacs.conversations.utils;
 31
 32import android.annotation.TargetApi;
 33import android.content.Context;
 34import android.os.Build;
 35import android.text.Spannable;
 36import android.text.SpannableString;
 37import android.text.SpannableStringBuilder;
 38import android.text.style.ForegroundColorSpan;
 39import android.util.LruCache;
 40import androidx.annotation.ColorInt;
 41import com.google.android.material.color.MaterialColors;
 42import eu.siacs.conversations.xmpp.Jid;
 43import java.util.ArrayList;
 44import java.util.Arrays;
 45import java.util.Collection;
 46import java.util.Collections;
 47import java.util.HashMap;
 48import java.util.HashSet;
 49import java.util.List;
 50import java.util.Map;
 51import java.util.Set;
 52import java.util.regex.Matcher;
 53import java.util.regex.Pattern;
 54
 55public class IrregularUnicodeDetector {
 56
 57    private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
 58    private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
 59    private static final List<String> AMBIGUOUS_CYRILLIC =
 60            Arrays.asList("а", "г", "е", "ѕ", "і", "ј", "ķ", "ԛ", "о", "р", "с", "у", "х");
 61
 62    static {
 63        Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
 64        temp.put(Character.UnicodeBlock.LATIN_1_SUPPLEMENT, Character.UnicodeBlock.BASIC_LATIN);
 65        NORMALIZATION_MAP = Collections.unmodifiableMap(temp);
 66    }
 67
 68    private static Character.UnicodeBlock normalize(Character.UnicodeBlock in) {
 69        if (NORMALIZATION_MAP.containsKey(in)) {
 70            return NORMALIZATION_MAP.get(in);
 71        } else {
 72            return in;
 73        }
 74    }
 75
 76    public static Spannable style(final Context context, Jid jid) {
 77        return style(
 78                jid,
 79                MaterialColors.getColor(
 80                        context,
 81                        com.google.android.material.R.attr.colorError,
 82                        "colorError not found"));
 83    }
 84
 85    private static Spannable style(Jid jid, @ColorInt int color) {
 86        PatternTuple patternTuple = find(jid);
 87        SpannableStringBuilder builder = new SpannableStringBuilder();
 88        if (jid.getLocal() != null && patternTuple.local != null) {
 89            SpannableString local = new SpannableString(jid.getLocal());
 90            colorize(local, patternTuple.local, color);
 91            builder.append(local);
 92            builder.append('@');
 93        }
 94        if (jid.getDomain() != null) {
 95            String[] labels = jid.getDomain().toString().split("\\.");
 96            for (int i = 0; i < labels.length; ++i) {
 97                SpannableString spannableString = new SpannableString(labels[i]);
 98                colorize(spannableString, patternTuple.domain.get(i), color);
 99                if (i != 0) {
100                    builder.append('.');
101                }
102                builder.append(spannableString);
103            }
104        }
105        if (builder.length() != 0 && jid.getResource() != null) {
106            builder.append('/');
107            builder.append(jid.getResource());
108        }
109        return builder;
110    }
111
112    private static void colorize(
113            SpannableString spannableString, Pattern pattern, @ColorInt int color) {
114        Matcher matcher = pattern.matcher(spannableString);
115        while (matcher.find()) {
116            if (matcher.start() < matcher.end()) {
117                spannableString.setSpan(
118                        new ForegroundColorSpan(color),
119                        matcher.start(),
120                        matcher.end(),
121                        Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
122            }
123        }
124    }
125
126    private static Map<Character.UnicodeBlock, List<String>> mapCompat(String word) {
127        Map<Character.UnicodeBlock, List<String>> map = new HashMap<>();
128        final int length = word.length();
129        for (int offset = 0; offset < length; ) {
130            final int codePoint = word.codePointAt(offset);
131            offset += Character.charCount(codePoint);
132            if (!Character.isLetter(codePoint)) {
133                continue;
134            }
135            Character.UnicodeBlock block = normalize(Character.UnicodeBlock.of(codePoint));
136            List<String> codePoints;
137            if (map.containsKey(block)) {
138                codePoints = map.get(block);
139            } else {
140                codePoints = new ArrayList<>();
141                map.put(block, codePoints);
142            }
143            codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
144        }
145        return map;
146    }
147
148    @TargetApi(Build.VERSION_CODES.N)
149    private static Map<Character.UnicodeScript, List<String>> map(String word) {
150        Map<Character.UnicodeScript, List<String>> map = new HashMap<>();
151        final int length = word.length();
152        for (int offset = 0; offset < length; ) {
153            final int codePoint = word.codePointAt(offset);
154            Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
155            if (script != Character.UnicodeScript.COMMON) {
156                List<String> codePoints;
157                if (map.containsKey(script)) {
158                    codePoints = map.get(script);
159                } else {
160                    codePoints = new ArrayList<>();
161                    map.put(script, codePoints);
162                }
163                codePoints.add(String.copyValueOf(Character.toChars(codePoint)));
164            }
165            offset += Character.charCount(codePoint);
166        }
167        return map;
168    }
169
170    private static Set<String> eliminateFirstAndGetCodePointsCompat(
171            Map<Character.UnicodeBlock, List<String>> map) {
172        return eliminateFirstAndGetCodePoints(map, Character.UnicodeBlock.BASIC_LATIN);
173    }
174
175    @TargetApi(Build.VERSION_CODES.N)
176    private static Set<String> eliminateFirstAndGetCodePoints(
177            Map<Character.UnicodeScript, List<String>> map) {
178        return eliminateFirstAndGetCodePoints(map, Character.UnicodeScript.COMMON);
179    }
180
181    private static <T> Set<String> eliminateFirstAndGetCodePoints(
182            Map<T, List<String>> map, T defaultPick) {
183        T pick = defaultPick;
184        int size = 0;
185        for (Map.Entry<T, List<String>> entry : map.entrySet()) {
186            if (entry.getValue().size() > size) {
187                size = entry.getValue().size();
188                pick = entry.getKey();
189            }
190        }
191        map.remove(pick);
192        Set<String> all = new HashSet<>();
193        for (List<String> codePoints : map.values()) {
194            all.addAll(codePoints);
195        }
196        return all;
197    }
198
199    private static Set<String> findIrregularCodePoints(String word) {
200        Set<String> codePoints;
201        if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
202            final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
203            final Set<String> set = asSet(map);
204            if (containsOnlyAmbiguousCyrillic(set)) {
205                return set;
206            }
207            codePoints = eliminateFirstAndGetCodePointsCompat(map);
208        } else {
209            final Map<Character.UnicodeScript, List<String>> map = map(word);
210            final Set<String> set = asSet(map);
211            if (containsOnlyAmbiguousCyrillic(set)) {
212                return set;
213            }
214            codePoints = eliminateFirstAndGetCodePoints(map);
215        }
216        return codePoints;
217    }
218
219    private static Set<String> asSet(Map<?, List<String>> map) {
220        final Set<String> flat = new HashSet<>();
221        for (List<String> value : map.values()) {
222            flat.addAll(value);
223        }
224        return flat;
225    }
226
227    private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
228        for (String codePoint : codePoints) {
229            if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
230                return false;
231            }
232        }
233        return true;
234    }
235
236    private static PatternTuple find(Jid jid) {
237        synchronized (CACHE) {
238            PatternTuple pattern = CACHE.get(jid);
239            if (pattern != null) {
240                return pattern;
241            }
242            pattern = PatternTuple.of(jid);
243            CACHE.put(jid, pattern);
244            return pattern;
245        }
246    }
247
248    private static Pattern create(Set<String> codePoints) {
249        final StringBuilder pattern = new StringBuilder();
250        for (String codePoint : codePoints) {
251            if (pattern.length() != 0) {
252                pattern.append('|');
253            }
254            pattern.append(Pattern.quote(codePoint));
255        }
256        return Pattern.compile(pattern.toString());
257    }
258
259    private static class PatternTuple {
260        private final Pattern local;
261        private final List<Pattern> domain;
262
263        private PatternTuple(Pattern local, List<Pattern> domain) {
264            this.local = local;
265            this.domain = domain;
266        }
267
268        private static PatternTuple of(Jid jid) {
269            final Pattern localPattern;
270            if (jid.getLocal() != null) {
271                localPattern = create(findIrregularCodePoints(jid.getLocal()));
272            } else {
273                localPattern = null;
274            }
275            String domain = jid.getDomain().toString();
276            final List<Pattern> domainPatterns = new ArrayList<>();
277            if (domain != null) {
278                for (String label : domain.split("\\.")) {
279                    domainPatterns.add(create(findIrregularCodePoints(label)));
280                }
281            }
282            return new PatternTuple(localPattern, domainPatterns);
283        }
284    }
285}