emoji-compat/createfont.py - platform/external/noto-fonts - Git at Google

 #!/usr/bin/env python3
 #
 # Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """
 Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
 under a meta tag with name 'Emji'.

 In order to create the final font the followings are used as inputs:

 - NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
 external/noto-fonts/emoji/NotoColorEmoji.ttf

 - Unicode files: Unicode files that are in the framework, and lists information about all the
 emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
 and emoji-variation-sequences.txt. Currently at external/unicode/.

 - additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
 in the Android font. Resides in framework and currently under external/unicode/.

 - data/emoji_metadata.txt: The file that includes the id, codepoints, the first
 Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
 version that the emoji was added (compatAdded). Updated when the script is executed.

 - data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.

 After execution the following files are generated if they don't exist otherwise, they are updated:
 - font/NotoColorEmojiCompat.ttf
 - supported-emojis/emojis.txt
 - data/emoji_metadata.txt
 - src/java/android/support/text/emoji/flatbuffer/*
 """

 import contextlib
 import csv
 import hashlib
 import itertools
 import json
 import os
 import re
 import shutil
 import subprocess
 import sys
 import tempfile
 from fontTools import ttLib
 from fontTools.ttLib.tables import otTables
 from nototools import font_data

 ########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
 # Last Android SDK Version
 SDK_VERSION = 31
 # metadata version that will be embedded into font. If there are updates to the font that would
 # cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
 # defines in which EmojiCompat metadata version the emoji is added to the font.
 METADATA_VERSION = 8

 ####### main directories where output files are created #######
 SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
 FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
 DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
 SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
 JAVA_SRC_DIR = os.path.join('src', 'java')
 ####### output files #######
 # font file
 FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
 # emoji metadata json output file
 OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
 # emojis test file
 TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
 ####### input files #######
 # Unicode file names to read emoji data
 EMOJI_DATA_FILE = 'emoji-data.txt'
 EMOJI_SEQ_FILE = 'emoji-sequences.txt'
 EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
 EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
 # Android OS emoji file for emojis that are not in Unicode files
 ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
 ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
 # Android OS emoji style override file. Codepoints that are rendered with emoji style by default
 # even though not defined so in <code>emoji-data.txt</code>.
 EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
 # emoji metadata file
 INPUT_META_FILE = OUTPUT_META_FILE
 # default flatbuffer module location (if not specified by caller)
 FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
 # flatbuffer schema
 FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
 # file path for java header, it will be prepended to flatbuffer java files
 FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
 # temporary emoji metadata json output file
 OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
 # temporary binary file generated by flatbuffer
 FLATBUFFER_BIN = 'emoji_metadata.bin'
 # directory representation for flatbuffer java package
 FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
 # temporary directory that contains flatbuffer java files
 FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
 FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
 FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
 # directory under source where flatbuffer java files will be copied into
 FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
 # meta tag name used in the font to embed the emoji metadata. This value is also used in
 # MetadataListReader.java in order to locate the metadata location.
 EMOJI_META_TAG_NAME = 'Emji'

 EMOJI_STR = 'EMOJI'
 EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
 ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
 STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'

 DEFAULT_EMOJI_ID = 0xF0001
 EMOJI_STYLE_VS = 0xFE0F

 # The reference code point to be used for filling metrics of wartermark glyph
 WATERMARK_REF_CODE_POINT = 0x1F600
 # The code point and glyph name used for watermark.
 WATERMARK_NEW_CODE_POINT = 0x10FF00
 WATERMARK_NEW_GLYPH_ID = 'u10FF00'

 def to_hex_str(value):
     """Converts given int value to hex without the 0x prefix"""
     return format(value, 'X')

 def hex_str_to_int(string):
     """Convert a hex string into int"""
     return int(string, 16)

 def codepoint_to_string(codepoints):
     """Converts a list of codepoints into a string separated with space."""
     return ' '.join([to_hex_str(x) for x in codepoints])

 def prepend_header_to_file(file_path, header_path):
     """Prepends the header to the file. Used to update flatbuffer java files with header, comments
     and annotations."""
     with open(file_path, "r+") as original_file:
         with open(header_path, "r") as copyright_file:
             original_content = original_file.read()
             original_file.seek(0)
             original_file.write(copyright_file.read() + "\n" + original_content)

 def is_ri(codepoint):
   return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF

 def is_flag_seq(codepoints):
   return all(is_ri(x) for x in codepoints)


 def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
     """Prepends headers to flatbuffer java files and copies to the final destination"""
     tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
     tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
     prepend_header_to_file(tmp_metadata_list, header_dir)
     prepend_header_to_file(tmp_metadata_item, header_dir)

     if not os.path.exists(target_dir):
         os.makedirs(target_dir)

     shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
     shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))

 def create_test_data(unicode_path):
     """Read all the emojis in the unicode files and update the test file"""
     lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
     lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))

     lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
     lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)

     # standardized variants contains a huge list of sequences, only read the ones that are emojis
     # and also the ones with FE0F (emoji style)
     standardized_variants_lines = read_emoji_lines(
         os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
     for line in standardized_variants_lines:
         if STD_VARIANTS_EMOJI_STYLE in line:
             lines.append(line)

     emojis_set = set()
     for line in lines:
         # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
         # here since we are already checking the emoji presentations with
         # emoji-variation-sequences.txt.
         if "BASIC_EMOJI" in line:
             continue
         codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
         emojis_set.add(codepoint_to_string(codepoints).upper())

     emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
     for line in emoji_data_lines:
         codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
         if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
             continue
         is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
         if is_emoji_style:
             codepoints = [to_hex_str(x) for x in
                           codepoints_for_emojirange(codepoints_range)]
             emojis_set.update(codepoints)

     emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
     #  finally add the android default emoji exceptions
     emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])

     emojis_list = list(emojis_set)
     emojis_list.sort()
     with open(TEST_DATA_PATH, "w") as test_file:
         for line in emojis_list:
             test_file.write("%s\n" % line)

 class _EmojiData(object):
     """Holds the information about a single emoji."""

     def __init__(self, codepoints, is_emoji_style):
         self.codepoints = codepoints
         self.emoji_style = is_emoji_style
         self.emoji_id = 0
         self.width = 0
         self.height = 0
         self.sdk_added = SDK_VERSION
         self.compat_added = METADATA_VERSION

     def update_metrics(self, metrics):
         """Updates width/height instance variables with the values given in metrics dictionary.
         :param metrics: a dictionary object that has width and height values.
         """
         self.width = metrics.width
         self.height = metrics.height

     def __repr__(self):
         return '<EmojiData {0} - {1}>'.format(self.emoji_style,
                                               codepoint_to_string(self.codepoints))

     def create_json_element(self):
         """Creates the json representation of EmojiData."""
         json_element = {}
         json_element['id'] = self.emoji_id
         json_element['emojiStyle'] = self.emoji_style
         json_element['sdkAdded'] = self.sdk_added
         json_element['compatAdded'] = self.compat_added
         json_element['width'] = self.width
         json_element['height'] = self.height
         json_element['codepoints'] = self.codepoints
         return json_element

     def create_txt_row(self):
         """Creates array of values for CSV of EmojiData."""
         row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
         row += [to_hex_str(x) for x in self.codepoints]
         return row

     def update(self, emoji_id, sdk_added, compat_added):
         """Updates current EmojiData with the values in a json element"""
         self.emoji_id = emoji_id
         self.sdk_added = sdk_added
         self.compat_added = compat_added


 def read_emoji_lines(file_path, optional=False):
     """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
     lines and comments
     :param file_path: unicode emoji file path
     :param optional: if True no exception is raised when the file cannot be read
     :return: list of uppercase strings
     """
     result = []
     try:
         with open(file_path) as file_stream:
             for line in file_stream:
                 line = line.strip()
                 if line and not line.startswith('#'):
                     result.append(line.upper())
     except IOError:
         if optional:
             pass
         else:
             raise

     return result

 def get_emoji_style_exceptions(unicode_path):
     """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
     lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
     exceptions = []
     for line in lines:
         codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
         exceptions.append(codepoint)
     return exceptions

 def codepoints_for_emojirange(codepoints_range):
     """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
     such as XYZ ... UVT
     """
     codepoints = []
     if '..' in codepoints_range:
         range_start, range_end = codepoints_range.split('..')
         codepoints_range = range(hex_str_to_int(range_start),
                                  hex_str_to_int(range_end) + 1)
         codepoints.extend(codepoints_range)
     else:
         codepoints.append(hex_str_to_int(codepoints_range))
     return codepoints

 def codepoints_and_emoji_prop(line):
     """For a given emoji file line, return codepoints and emoji property in the line.
     1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
     |Extended_Pictographic] # [...]"""
     line = line.strip()
     if '#' in line:
         line = line[:line.index('#')]
     else:
         raise ValueError("Line is expected to have # in it")
     line = line.split(';')
     codepoints_range = line[0].strip()
     emoji_property = line[1].strip()

     return codepoints_range, emoji_property

 def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
     """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
     intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
     A line format that is expected is as follows:
     1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
     |Extended_Pictographic] # [...]"""
     lines = read_emoji_lines(file_path)

     for line in lines:
         codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
         if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
             continue
         is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
         codepoints = codepoints_for_emojirange(codepoints_range)

         for codepoint in codepoints:
             key = codepoint_to_string([codepoint])
             codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
             if key in emoji_data_map:
                 # since there are multiple definitions of emojis, only update when emoji style is
                 # True
                 if codepoint_is_emoji_style:
                     emoji_data_map[key].emoji_style = True
             else:
                 emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
                 emoji_data_map[key] = emoji_data


 def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None):
     """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
     line and puts into emoji_data_map."""
     lines = read_emoji_lines(file_path, optional)
     # 1F1E6 1F1E8 ; Name ; [...]
     for line in lines:
         # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
         # here since we are already checking the emoji presentations with
         # emoji-variation-sequences.txt.
         if "BASIC_EMOJI" in line:
             continue
         codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
         codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
         if filter:
           if filter(codepoints):
             continue
         key = codepoint_to_string(codepoints)
         if not key in emoji_data_map:
             emoji_data = _EmojiData(codepoints, False)
             emoji_data_map[key] = emoji_data


 def load_emoji_data_map(unicode_path, without_flags):
     """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
     :return: map of space separated codepoints to EmojiData
     """
     if without_flags:
       filter = lambda x: is_flag_seq(x)
     else:
       filter = None
     emoji_data_map = {}
     emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
     read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
                          emoji_style_exceptions)
     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter)

     # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
                          optional=True)
     # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
                          optional=True)

     return emoji_data_map


 def load_previous_metadata(emoji_data_map):
     """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
        in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
        emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
        exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
     current_emoji_id = DEFAULT_EMOJI_ID
     if os.path.isfile(INPUT_META_FILE):
         with open(INPUT_META_FILE) as csvfile:
             reader = csv.reader(csvfile, delimiter=' ')
             for row in reader:
                 if row[0].startswith('#'):
                     continue
                 emoji_id = hex_str_to_int(row[0])
                 sdk_added = int(row[1])
                 compat_added = int(row[2])
                 key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
                 if key in emoji_data_map:
                     emoji_data = emoji_data_map[key]
                     emoji_data.update(emoji_id, sdk_added, compat_added)
                     if emoji_data.emoji_id >= current_emoji_id:
                         current_emoji_id = emoji_data.emoji_id + 1

     return current_emoji_id


 def update_ttlib_orig_sort():
     """Updates the ttLib tag sort with a closure that makes the meta table first."""
     orig_sort = ttLib.sortedTagList

     def meta_first_table_sort(tag_list, table_order=None):
         """Sorts the tables with the original ttLib sort, then makes the meta table first."""
         tag_list = orig_sort(tag_list, table_order)
         tag_list.remove('meta')
         tag_list.insert(0, 'meta')
         return tag_list

     ttLib.sortedTagList = meta_first_table_sort


 def inject_meta_into_font(ttf, flatbuffer_bin_filename):
     """inject metadata binary into font"""
     if not 'meta' in ttf:
         ttf['meta'] = ttLib.getTableClass('meta')()
     meta = ttf['meta']
     with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
         meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()

     # sort meta tables for faster access
     update_ttlib_orig_sort()


 def validate_input_files(font_path, unicode_path, flatbuffer_path):
     """Validate the existence of font file and the unicode files"""
     if not os.path.isfile(font_path):
         raise ValueError("Font file does not exist: " + font_path)

     if not os.path.isdir(unicode_path):
         raise ValueError(
             "Unicode directory does not exist or is not a directory " + unicode_path)

     emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
                        os.path.join(unicode_path, EMOJI_ZWJ_FILE),
                        os.path.join(unicode_path, EMOJI_SEQ_FILE)]
     for emoji_filename in emoji_filenames:
         if not os.path.isfile(emoji_filename):
             raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)

     if not os.path.isdir(flatbuffer_path):
         raise ValueError(
             "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)

     flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
                             os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
     for flatbuffer_filename in flatbuffer_filenames:
         if not os.path.isfile(flatbuffer_filename):
             raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)


 def add_file_to_sha(sha_algo, file_path):
     with open(file_path, 'rb') as input_file:
         for data in iter(lambda: input_file.read(8192), b''):
             sha_algo.update(data)

 def create_sha_from_source_files(font_paths):
     """Creates a SHA from the given font files"""
     sha_algo = hashlib.sha256()
     for file_path in font_paths:
         add_file_to_sha(sha_algo, file_path)
     return sha_algo.hexdigest()


 class EmojiFontCreator(object):
     """Creates the EmojiCompat font"""

     def __init__(self, font_path, unicode_path, without_flags):
         validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)

         self.font_path = font_path
         self.unicode_path = unicode_path
         self.without_flags = without_flags
         self.emoji_data_map = {}
         self.remapped_codepoints = {}
         self.glyph_to_image_metrics_map = {}
         # set default emoji id to start of Supplemental Private Use Area-A
         self.emoji_id = DEFAULT_EMOJI_ID

     def update_emoji_data(self, codepoints, glyph_name):
         """Updates the existing EmojiData identified with codepoints. The fields that are set are:
         - emoji_id (if it does not exist)
         - image width/height"""
         key = codepoint_to_string(codepoints)
         if key in self.emoji_data_map:
             # add emoji to final data
             emoji_data = self.emoji_data_map[key]
             emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
             if emoji_data.emoji_id == 0:
                 emoji_data.emoji_id = self.emoji_id
                 self.emoji_id = self.emoji_id + 1
             self.remapped_codepoints[emoji_data.emoji_id] = glyph_name

     def read_cbdt(self, ttf):
         """Read image size data from CBDT."""
         cbdt = ttf['CBDT']
         for strike_data in cbdt.strikeData:
             for key, data in strike_data.items():
                 data.decompile()
                 self.glyph_to_image_metrics_map[key] = data.metrics

     def read_cmap12(self, ttf, glyph_to_codepoint_map):
         """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
         finally clears all elements in CMAP 12"""
         cmap = ttf['cmap']
         for table in cmap.tables:
             if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
                 for codepoint, glyph_name in table.cmap.items():
                     glyph_to_codepoint_map[glyph_name] = codepoint
                     self.update_emoji_data([codepoint], glyph_name)
                 return table
         raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")

     def read_gsub(self, ttf, glyph_to_codepoint_map):
         """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
         gsub = ttf['GSUB']
         ligature_subtables = []
         context_subtables = []
         # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
         # and would be expensive with little value
         for lookup in gsub.table.LookupList.Lookup:
             for subtable in lookup.SubTable:
                 if subtable.LookupType == 5:
                     context_subtables.append(subtable)
                 elif subtable.LookupType == 4:
                     ligature_subtables.append(subtable)

         for subtable in context_subtables:
             self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)

         for subtable in ligature_subtables:
             self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)

     def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
         """Add substitutions defined as OpenType Context Substitution"""
         for sub_class_set in subtable.SubClassSet:
             if sub_class_set:
                 for sub_class_rule in sub_class_set.SubClassRule:
                     # prepare holder for substitution list. each rule will have a list that is added
                     # to the subs_list.
                     subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
                     for record in sub_class_rule.SubstLookupRecord:
                         subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
                                                                             record.LookupListIndex)
                     # create combinations or all lists. the combinations will be filtered by
                     # emoji_data_map. the first element that contain as a valid glyph will be used
                     # as the final glyph
                     combinations = list(itertools.product(*subs_list))
                     for seq in combinations:
                         glyph_names = [x["input"] for x in seq]
                         codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
                         outputs = [x["output"] for x in seq if x["output"]]
                         nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
                         if len(nonempty_outputs) == 0:
                             print("Warning: no output glyph is set for " + str(glyph_names))
                             continue
                         elif len(nonempty_outputs) > 1:
                             print(
                                 "Warning: multiple glyph is set for "
                                     + str(glyph_names) + ", will use the first one")

                         glyph = nonempty_outputs[0]
                         self.update_emoji_data(codepoints, glyph)

     def get_substitutions(self, lookup_list, index):
         result = []
         for x in lookup_list.Lookup[index].SubTable:
             for input, output in x.mapping.items():
                 result.append({"input": input, "output": output})
         return result

     def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
         for name, ligatures in subtable.ligatures.items():
             for ligature in ligatures:
                 glyph_names = [name] + ligature.Component
                 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
                 self.update_emoji_data(codepoints, ligature.LigGlyph)

     def write_metadata_json(self, output_json_file_path):
         """Writes the emojis into a json file"""
         output_json = {}
         output_json['version'] = METADATA_VERSION
         output_json['sourceSha'] = create_sha_from_source_files(
             [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
         output_json['list'] = []

         emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)

         total_emoji_count = 0
         for emoji_data in emoji_data_list:
             if self.without_flags and is_flag_seq(emoji_data.codepoints):
                 continue  # Do not add flags emoji data if this is for subset font.
             element = emoji_data.create_json_element()
             output_json['list'].append(element)
             total_emoji_count = total_emoji_count + 1

         # write the new json file to be processed by FlatBuffers
         with open(output_json_file_path, 'w') as json_file:
             print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
                   file=json_file)

         return total_emoji_count

     def write_metadata_csv(self):
         """Writes emoji metadata into space separated file"""
         with open(OUTPUT_META_FILE, 'w') as csvfile:
             csvwriter = csv.writer(csvfile, delimiter=' ')
             emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
             csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
             for emoji_data in emoji_data_list:
                 csvwriter.writerow(emoji_data.create_txt_row())

     def add_watermark(self, ttf):
         cmap = ttf.getBestCmap()
         gsub = ttf['GSUB'].table

         # Obtain Version string
         m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
         if not m:
             raise ValueError('The font does not have proper version string.')
         major = m.group(1)
         minor = m.group(2)
         # Replace the dot with space since NotoColorEmoji does not have glyph for dot.
         glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]

         # Update Glyph metrics
         ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
         refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
         ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
         ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]

         # Add new Glyph to cmap
         font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })

         # Add lookup table for the version string.
         lookups = gsub.LookupList.Lookup
         new_lookup = otTables.Lookup()
         new_lookup.LookupType = 2  # Multiple Substitution Subtable.
         new_lookup.LookupFlag = 0
         new_subtable = otTables.MultipleSubst()
         new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
         new_lookup.SubTable = [ new_subtable ]
         new_lookup_index = len(lookups)
         lookups.append(new_lookup)

         # Add feature
         feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
         if not feature:
             raise ValueError("Font doesn't contain ccmp feature.")

         feature.Feature.LookupListIndex.append(new_lookup_index)

     def create_font(self):
         """Creates the EmojiCompat font.
         :param font_path: path to Android NotoColorEmoji font
         :param unicode_path: path to directory that contains unicode files
         """

         tmp_dir = tempfile.mkdtemp()

         # create emoji codepoints to EmojiData map
         self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags)

         # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
         # returned is either default or 1 greater than the largest id in previous data
         self.emoji_id = load_previous_metadata(self.emoji_data_map)

         # recalcTimestamp parameter will keep the modified field same as the original font. Changing
         # the modified field in the font causes the font ttf file to change, which makes it harder
         # to understand if something really changed in the font.
         with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
             # read image size data
             self.read_cbdt(ttf)

             # glyph name to codepoint map
             glyph_to_codepoint_map = {}

             # read single codepoint emojis under cmap12 and clear the table contents
             cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)

             # read emoji sequences gsub and clear the table contents
             self.read_gsub(ttf, glyph_to_codepoint_map)

             # add all new codepoint to glyph mappings
             cmap12_table.cmap.update(self.remapped_codepoints)

             # final metadata csv will be used to generate the sha, therefore write it before
             # metadata json is written.
             self.write_metadata_csv()

             output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
             flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
             flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)

             total_emoji_count = self.write_metadata_json(output_json_file)

             # create the flatbuffers binary and java classes
             flatc_command = ['flatc',
                              '-o',
                              tmp_dir,
                              '-b',
                              '-j',
                              FLATBUFFER_SCHEMA,
                              output_json_file]
             subprocess.check_output(flatc_command)

             # inject metadata binary into font
             inject_meta_into_font(ttf, flatbuffer_bin_file)

             # add wartermark glyph for manual verification.
             self.add_watermark(ttf)

             # update CBDT and CBLC versions since older android versions cannot read > 2.0
             ttf['CBDT'].version = 2.0
             ttf['CBLC'].version = 2.0

             # save the new font
             ttf.save(FONT_PATH)

             update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
                                          FLATBUFFER_HEADER,
                                          FLATBUFFER_JAVA_TARGET)

             create_test_data(self.unicode_path)

             # clear the tmp output directory
             shutil.rmtree(tmp_dir, ignore_errors=True)

             print(
                 "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))


 def print_usage():
     """Prints how to use the script."""
     print("Please specify a path to font and unicode files.\n"
           "usage: createfont.py noto-color-emoji-path unicode-dir-path")

 def parse_args(argv):
     # parse manually to avoid any extra dependencies
     if len(argv) == 4:
       without_flags = argv[3] == '--without-flags'
     else:
       without_flags = False

     if len(argv) < 3:
         print_usage()
         sys.exit(1)
     return (sys.argv[1], sys.argv[2], without_flags)

 def main():
     font_file, unicode_dir, without_flags = parse_args(sys.argv)
     EmojiFontCreator(font_file, unicode_dir, without_flags).create_font()


 if __name__ == '__main__':
     main()