From 71a56002fee0410de47075217ae7f600a0c5f813 Mon Sep 17 00:00:00 2001 From: Daniel Gultsch Date: Thu, 9 Jul 2020 17:43:30 +0200 Subject: [PATCH] fix emojis not rendering correctly with trailing variant selector. fixes #3819 --- .../siacs/conversations/utils/Emoticons.java | 455 +++++++++--------- 1 file changed, 228 insertions(+), 227 deletions(-) diff --git a/src/main/java/eu/siacs/conversations/utils/Emoticons.java b/src/main/java/eu/siacs/conversations/utils/Emoticons.java index 6abcf9b02..26dc205b4 100644 --- a/src/main/java/eu/siacs/conversations/utils/Emoticons.java +++ b/src/main/java/eu/siacs/conversations/utils/Emoticons.java @@ -29,280 +29,281 @@ package eu.siacs.conversations.utils; +import android.support.annotation.NonNull; import android.util.LruCache; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.regex.Pattern; public class Emoticons { - private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF); - private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF); - private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F); - private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF); - private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF); - private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF); - private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF); - private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF); - private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF); - private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF); - private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF); - private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF); - private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F); - private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D); - private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139); + private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF); + private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF); + private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1F64F); + private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF); + private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF); + private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF); + private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF); + private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF); + private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF); + private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF); + private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF); + private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF); + private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F); + private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D); + private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139); - private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39)); + private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39)); - private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks( - GEOMETRIC_SHAPES, - LATIN_SUPPLEMENT, - CYK_SYMBOLS_AND_PUNCTUATION, - LETTERLIKE_SYMBOLS, - KEYCAP_COMBINEABLE); - private static final UnicodeBlocks EMOJIS = new UnicodeBlocks( - MISC_SYMBOLS_AND_PICTOGRAPHS, - SUPPLEMENTAL_SYMBOLS, - EMOTICONS, - TRANSPORT_SYMBOLS, - MISC_SYMBOLS, - DINGBATS, - ENCLOSED_ALPHANUMERIC_SUPPLEMENT, - ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, - MISC_TECHNICAL); + private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks( + GEOMETRIC_SHAPES, + LATIN_SUPPLEMENT, + CYK_SYMBOLS_AND_PUNCTUATION, + LETTERLIKE_SYMBOLS, + KEYCAP_COMBINEABLE); + private static final UnicodeBlocks EMOJIS = new UnicodeBlocks( + MISC_SYMBOLS_AND_PICTOGRAPHS, + SUPPLEMENTAL_SYMBOLS, + EMOTICONS, + TRANSPORT_SYMBOLS, + MISC_SYMBOLS, + DINGBATS, + ENCLOSED_ALPHANUMERIC_SUPPLEMENT, + ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, + MISC_TECHNICAL); - private static final int MAX_EMOIJS = 42; + private static final int MAX_EMOIJS = 42; - private static final int ZWJ = 0x200D; - private static final int VARIATION_16 = 0xFE0F; - private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3; - private static final int BLACK_FLAG = 0x1F3F4; - private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF); + private static final int ZWJ = 0x200D; + private static final int VARIATION_16 = 0xFE0F; + private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3; + private static final int BLACK_FLAG = 0x1F3F4; + private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF); - private static final LruCache CACHE = new LruCache<>(256); + private static final LruCache CACHE = new LruCache<>(256); - private static List parse(String input) { - List symbols = new ArrayList<>(); - Builder builder = new Builder(); - boolean needsFinalBuild = false; - for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) { - cp = input.codePointAt(i); - if (builder.offer(cp)) { - needsFinalBuild = true; - } else { - symbols.add(builder.build()); - builder = new Builder(); - if (builder.offer(cp)) { - needsFinalBuild = true; - } - } - } - if (needsFinalBuild) { - symbols.add(builder.build()); - } - return symbols; - } + private static List parse(String input) { + List symbols = new ArrayList<>(); + Builder builder = new Builder(); + boolean needsFinalBuild = false; + for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) { + cp = input.codePointAt(i); + if (builder.offer(cp)) { + needsFinalBuild = true; + } else { + symbols.add(builder.build()); + builder = new Builder(); + if (builder.offer(cp)) { + needsFinalBuild = true; + } + } + } + if (needsFinalBuild) { + symbols.add(builder.build()); + } + return symbols; + } - public static Pattern getEmojiPattern(CharSequence input) { - Pattern pattern = CACHE.get(input); - if (pattern == null) { - pattern = generatePattern(input); - CACHE.put(input, pattern); - } - return pattern; - } + public static Pattern getEmojiPattern(final CharSequence input) { + Pattern pattern = CACHE.get(input); + if (pattern == null) { + pattern = generatePattern(input); + CACHE.put(input, pattern); + } + return pattern; + } - private static Pattern generatePattern(CharSequence input) { - final HashSet emojis = new HashSet<>(); - int i = 0; - for(Symbol symbol : parse(input.toString())) { - if (symbol instanceof Emoji) { - emojis.add(symbol.toString()); - if (++i >= MAX_EMOIJS) { - return Pattern.compile(""); - } - } - } - final StringBuilder pattern = new StringBuilder(); - for(String emoji : emojis) { - if (pattern.length() != 0) { - pattern.append('|'); - } - pattern.append(Pattern.quote(emoji)); - } - return Pattern.compile(pattern.toString()); - } + private static Pattern generatePattern(CharSequence input) { + final HashSet emojis = new HashSet<>(); + int i = 0; + for (final Symbol symbol : parse(input.toString())) { + if (symbol instanceof Emoji) { + emojis.add(symbol.toString()); + if (++i >= MAX_EMOIJS) { + return Pattern.compile(""); + } + } + } + final StringBuilder pattern = new StringBuilder(); + for (String emoji : emojis) { + if (pattern.length() != 0) { + pattern.append('|'); + } + pattern.append(Pattern.quote(emoji)); + } + return Pattern.compile(pattern.toString()); + } - public static boolean isEmoji(String input) { - List symbols = parse(input); - return symbols.size() == 1 && symbols.get(0).isEmoji(); - } + public static boolean isEmoji(String input) { + List symbols = parse(input); + return symbols.size() == 1 && symbols.get(0).isEmoji(); + } - public static boolean isOnlyEmoji(String input) { - List symbols = parse(input); - for(Symbol symbol : symbols) { - if (!symbol.isEmoji()) { - return false; - } - } - return symbols.size() > 0; - } + public static boolean isOnlyEmoji(String input) { + List symbols = parse(input); + for (Symbol symbol : symbols) { + if (!symbol.isEmoji()) { + return false; + } + } + return symbols.size() > 0; + } - private static abstract class Symbol { + private static abstract class Symbol { - private final String value; + private final String value; - public Symbol(List codepoints) { - StringBuilder builder = new StringBuilder(); - for(Integer codepoint : codepoints) { - builder.appendCodePoint(codepoint); - } - this.value = builder.toString(); - } + Symbol(List codepoints) { + final StringBuilder builder = new StringBuilder(); + for (final Integer codepoint : codepoints) { + builder.appendCodePoint(codepoint); + } + this.value = builder.toString(); + } - abstract boolean isEmoji(); + abstract boolean isEmoji(); - @Override - public String toString() { - return value; - } - } + @NonNull + @Override + public String toString() { + return value; + } + } - public static class Emoji extends Symbol { + public static class Emoji extends Symbol { - public Emoji(List codepoints) { - super(codepoints); - } + Emoji(List codepoints) { + super(codepoints); + } - @Override - boolean isEmoji() { - return true; - } - } + @Override + boolean isEmoji() { + return true; + } + } - public static class Other extends Symbol { + public static class Other extends Symbol { - public Other(List codepoints) { - super(codepoints); - } + public Other(List codepoints) { + super(codepoints); + } - @Override - boolean isEmoji() { - return false; - } - } + @Override + boolean isEmoji() { + return false; + } + } - private static class Builder { - private final List codepoints = new ArrayList<>(); + private static class Builder { + private final List codepoints = new ArrayList<>(); - public boolean offer(int codepoint) { - boolean add = false; - if (this.codepoints.size() == 0) { - if (SYMBOLIZE.contains(codepoint)) { - add = true; - } else if (REGIONAL_INDICATORS.contains(codepoint)) { - add = true; - } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) { - add = true; - } - } else { - int previous = codepoints.get(codepoints.size() -1); - if (codepoints.get(0) == BLACK_FLAG) { - add = TAGS.contains(codepoint); - } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) { - add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16; - } else if (SYMBOLIZE.contains(previous)) { - add = codepoint == VARIATION_16; - } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) { - add = codepoints.size() == 1; - } else if (previous == VARIATION_16) { - add = isMerger(codepoint); - } else if (FITZPATRICK.contains(previous)) { - add = codepoint == ZWJ; - } else if (ZWJ == previous) { - add = EMOJIS.contains(codepoint); - } else if (isMerger(codepoint)) { - add = true; - } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) { - add = true; - } - } - if (add) { - codepoints.add(codepoint); - return true; - } else { - return false; - } - } + public boolean offer(int codepoint) { + boolean add = false; + if (this.codepoints.size() == 0) { + if (SYMBOLIZE.contains(codepoint)) { + add = true; + } else if (REGIONAL_INDICATORS.contains(codepoint)) { + add = true; + } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) { + add = true; + } + } else { + int previous = codepoints.get(codepoints.size() - 1); + if (codepoints.get(0) == BLACK_FLAG) { + add = TAGS.contains(codepoint); + } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) { + add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16; + } else if (SYMBOLIZE.contains(previous)) { + add = codepoint == VARIATION_16; + } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) { + add = codepoints.size() == 1; + } else if (previous == VARIATION_16) { + add = isMerger(codepoint) || codepoint == VARIATION_16; + } else if (FITZPATRICK.contains(previous)) { + add = codepoint == ZWJ; + } else if (ZWJ == previous) { + add = EMOJIS.contains(codepoint); + } else if (isMerger(codepoint)) { + add = true; + } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) { + add = true; + } + } + if (add) { + codepoints.add(codepoint); + return true; + } else { + return false; + } + } - private static boolean isMerger(int codepoint) { - return codepoint == ZWJ || FITZPATRICK.contains(codepoint); - } + private static boolean isMerger(int codepoint) { + return codepoint == ZWJ || FITZPATRICK.contains(codepoint); + } - public Symbol build() { - if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) { - return new Other(codepoints); - } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) { - return new Other(codepoints); - } - return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints); - } - } + public Symbol build() { + if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) { + return new Other(codepoints); + } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) { + return new Other(codepoints); + } + return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints); + } + } - public static class UnicodeBlocks implements UnicodeSet { - final UnicodeSet[] unicodeSets; + public static class UnicodeBlocks implements UnicodeSet { + final UnicodeSet[] unicodeSets; - public UnicodeBlocks(UnicodeSet... sets) { - this.unicodeSets = sets; - } + UnicodeBlocks(final UnicodeSet... sets) { + this.unicodeSets = sets; + } - @Override - public boolean contains(int codepoint) { - for(UnicodeSet unicodeSet : unicodeSets) { - if (unicodeSet.contains(codepoint)) { - return true; - } - } - return false; - } - } + @Override + public boolean contains(int codepoint) { + for (UnicodeSet unicodeSet : unicodeSets) { + if (unicodeSet.contains(codepoint)) { + return true; + } + } + return false; + } + } - public interface UnicodeSet { - boolean contains(int codepoint); - } + public interface UnicodeSet { + boolean contains(int codepoint); + } - public static class UnicodeList implements UnicodeSet { + public static class UnicodeList implements UnicodeSet { - private final List list; + private final List list; - public UnicodeList(Integer... codes) { - this.list = Arrays.asList(codes); - } + UnicodeList(final Integer... codes) { + this.list = Arrays.asList(codes); + } - @Override - public boolean contains(int codepoint) { - return this.list.contains(codepoint); - } - } + @Override + public boolean contains(int codepoint) { + return this.list.contains(codepoint); + } + } - public static class UnicodeRange implements UnicodeSet { + public static class UnicodeRange implements UnicodeSet { - private final int lower; - private final int upper; + private final int lower; + private final int upper; - UnicodeRange(int lower, int upper) { - this.lower = lower; - this.upper = upper; - } + UnicodeRange(int lower, int upper) { + this.lower = lower; + this.upper = upper; + } - public boolean contains(int codePoint) { - return codePoint >= lower && codePoint <= upper; - } - } + public boolean contains(int codePoint) { + return codePoint >= lower && codePoint <= upper; + } + } }