blob: 7db447df2d105ae883bb76b26c7b47eb3d57e9c6 [file] [log] [blame]
package org.unicode.cldr.util;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.unicode.cldr.draft.FileUtilities;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableSet;
import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.CharSequences;
import com.ibm.icu.text.UnicodeSet;
public class Emoji {
public static final String EMOJI_VARIANT = "\uFE0F";
public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3";
public static final String ZWJ = "\u200D";
public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6,0x1F1FF).freeze();
public static final UnicodeSet MODIFIERS = new UnicodeSet("[🏻-🏿]").freeze();
public static final UnicodeSet TAGS = new UnicodeSet(0xE0000,0xE007F).freeze();
public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D 👦-👩 💋 ❤]").freeze();
public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze();
public static final UnicodeSet SPECIALS = new UnicodeSet("[{🏳‍🌈}{👁‍🗨}]").freeze();
public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[👨 👩]").freeze();
public static final UnicodeSet OBJECT = new UnicodeSet("[👩 🎓 🌾 🍳 🏫 🏭 🎨 🚒 ✈ 🚀 🎤 💻 🔬 💼 🔧 ⚖ ⚕]").freeze();
static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>();
static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>();
static final Map<String,Integer> minorToOrder = new HashMap<>();
static final UnicodeSet nonConstructed = new UnicodeSet();
static {
/*
# group: Smileys & People
# subgroup: face-positive
1F600 ; fully-qualified # 😀 grinning face
*/
Splitter semi = Splitter.on(';').trimResults();
String majorCategory = null;
String minorCategory = null;
for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
if (line.startsWith("#")) {
line = line.substring(1).trim();
if (line.startsWith("group:")) {
majorCategory = line.substring("group:".length()).trim();
} else if (line.startsWith("subgroup:")) {
minorCategory = line.substring("subgroup:".length()).trim();
if (!minorToOrder.containsKey(minorCategory)) {
minorToOrder.put(minorCategory, minorToOrder.size());
}
}
continue;
}
line = line.trim();
if (line.isEmpty()) {
continue;
}
Iterator<String> it = semi.split(line).iterator();
String emojiHex = it.next();
// String type = it.next();
// if (!type.startsWith("fully-qualified")) {
// continue;
// }
String original = Utility.fromHex(emojiHex, 4, " ");
emojiToMajorCategory.put(original, majorCategory);
emojiToMinorCategory.put(original, minorCategory);
// add all the non-constructed values to a set for annotations
String minimal = original.replace(EMOJI_VARIANT, "");
boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE;
// skip constructed values
if (minimal.contains(COMBINING_ENCLOSING_KEYCAP)
|| REGIONAL_INDICATORS.containsSome(minimal)
|| TAGS.containsSome(minimal)
|| !singleton && MODIFIERS.containsSome(minimal)
|| !singleton && FAMILY.containsAll(minimal)
) {
// do nothing
} else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences
if (SPECIALS.contains(minimal)
|| GENDER.containsSome(minimal)
|| MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))
) {
nonConstructed.add(minimal);
}
} else if (!minimal.contains("🔟")){
nonConstructed.add(minimal);
}
}
emojiToMajorCategory.freeze();
emojiToMinorCategory.freeze();
nonConstructed.add(MODIFIERS); // needed for names
nonConstructed.freeze();
}
public static String getMinorCategory(String emoji) {
return CldrUtility.ifNull(emojiToMinorCategory.get(emoji),"Component");
}
public static int getMinorToOrder(String minor) {
Integer result = minorToOrder.get(minor);
return result == null ? Integer.MAX_VALUE : result;
}
public static String getMajorCategory(String emoji) {
return CldrUtility.ifNull(emojiToMajorCategory.get(emoji),"Component");
}
public static Set<String> getMajorCategories() {
return emojiToMajorCategory.values();
}
public static Set<String> getMinorCategories() {
return emojiToMinorCategory.values();
}
public static UnicodeSet getNonConstructed() {
return nonConstructed;
}
private static Set<String> NAME_PATHS = null;
private static Set<String> KEYWORD_PATHS = null;
public static final String TYPE_TTS = "[@type=\"tts\"]";
public static synchronized Set<String> getNamePaths() {
return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS));
}
public static synchronized Set<String> getKeywordPaths() {
return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths(""));
}
private static ImmutableSet<String> buildPaths(String suffix) {
ImmutableSet.Builder<String> builder = ImmutableSet.builder();
for (String s : Emoji.getNonConstructed()) {
String base = "//ldml/annotations/annotation[@cp=\""+s+"\"]" + suffix;
builder.add(base);
}
return builder.build();
}
}