blob: 1e0b3959f3978dccf0f35cc757b07b268488d910 [file] [log] [blame]
#!/usr/bin/python
# coding=UTF-8
#
# Copyright 2014 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a curated subset of NotoSansSymbols for Android."""
__author__ = 'roozbeh@google.com (Roozbeh Pournader)'
import os
import sys
from nototools import subset
from nototools import unicode_data
# Unicode blocks that we want to include in the font
BLOCKS_TO_INCLUDE = """
20D0..20FF; Combining Diacritical Marks for Symbols
2100..214F; Letterlike Symbols
2190..21FF; Arrows
2200..22FF; Mathematical Operators
2300..23FF; Miscellaneous Technical
2400..243F; Control Pictures
2440..245F; Optical Character Recognition
2460..24FF; Enclosed Alphanumerics
2500..257F; Box Drawing
2580..259F; Block Elements
25A0..25FF; Geometric Shapes
2600..26FF; Miscellaneous Symbols
2700..27BF; Dingbats
27C0..27EF; Miscellaneous Mathematical Symbols-A
27F0..27FF; Supplemental Arrows-A
2800..28FF; Braille Patterns
2900..297F; Supplemental Arrows-B
2980..29FF; Miscellaneous Mathematical Symbols-B
2A00..2AFF; Supplemental Mathematical Operators
2B00..2BFF; Miscellaneous Symbols and Arrows
4DC0..4DFF; Yijing Hexagram Symbols
10140..1018F; Ancient Greek Numbers
10190..101CF; Ancient Symbols
101D0..101FF; Phaistos Disc
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
1D200..1D24F; Ancient Greek Musical Notation
1D300..1D35F; Tai Xuan Jing Symbols
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
1F0A0..1F0FF; Playing Cards
1F700..1F77F; Alchemical Symbols
"""
# One-off characters to be included. At the moment, this is the Bitcoin sign
# (since it's not supported in Roboto yet, and the Japanese TV symbols of
# Unicode 9.
ONE_OFF_ADDITIONS = {
0x20BF, # ₿ BITCOIN SIGN
0x1F19B, # 🆛 SQUARED THREE D
0x1F19C, # 🆜 SQUARED SECOND SCREEN
0x1F19D, # 🆝 SQUARED TWO K;So;0;L;;;;;N;;;;;
0x1F19E, # 🆞 SQUARED FOUR K;So;0;L;;;;;N;;;;;
0x1F19F, # 🆟 SQUARED EIGHT K;So;0;L;;;;;N;;;;;
0x1F1A0, # 🆠 SQUARED FIVE POINT ONE;So;0;L;;;;;N;;;;;
0x1F1A1, # 🆡 SQUARED SEVEN POINT ONE;So;0;L;;;;;N;;;;;
0x1F1A2, # 🆢 SQUARED TWENTY-TWO POINT TWO;So;0;L;;;;;N;;;;;
0x1F1A3, # 🆣 SQUARED SIXTY P;So;0;L;;;;;N;;;;;
0x1F1A4, # 🆤 SQUARED ONE HUNDRED TWENTY P;So;0;L;;;;;N;;;;;
0x1F1A5, # 🆥 SQUARED LATIN SMALL LETTER D;So;0;L;;;;;N;;;;;
0x1F1A6, # 🆦 SQUARED HC;So;0;L;;;;;N;;;;;
0x1F1A7, # 🆧 SQUARED HDR;So;0;L;;;;;N;;;;;
0x1F1A8, # 🆨 SQUARED HI-RES;So;0;L;;;;;N;;;;;
0x1F1A9, # 🆩 SQUARED LOSSLESS;So;0;L;;;;;N;;;;;
0x1F1AA, # 🆪 SQUARED SHV;So;0;L;;;;;N;;;;;
0x1F1AB, # 🆫 SQUARED UHD;So;0;L;;;;;N;;;;;
0x1F1AC, # 🆬 SQUARED VOD;So;0;L;;;;;N;;;;;
0x1F23B, # 🈻 SQUARED CJK UNIFIED IDEOGRAPH-914D
}
# letter-based characters, provided by Roboto
LETTERLIKE_CHARS_IN_ROBOTO = {
0x2100, # ℀ ACCOUNT OF
0x2101, # ℁ ADDRESSED TO THE SUBJECT
0x2103, # ℃ DEGREE CELSIUS
0x2105, # ℅ CARE OF
0x2106, # ℆ CADA UNA
0x2109, # ℉ DEGREE FAHRENHEIT
0x2113, # ℓ SCRIPT SMALL L
0x2116, # № NUMERO SIGN
0x2117, # ℗ SOUND RECORDING COPYRIGHT
0x211E, # ℞ PRESCRIPTION TAKE
0x211F, # ℟ RESPONSE
0x2120, # ℠ SERVICE MARK
0x2121, # ℡ TELEPHONE SIGN
0x2122, # ™ TRADE MARK SIGN
0x2123, # ℣ VERSICLE
0x2125, # ℥ OUNCE SIGN
0x2126, # Ω OHM SIGN
0x212A, # K KELVIN SIGN
0x212B, # Å ANGSTROM SIGN
0x212E, # ℮ ESTIMATED SYMBOL
0x2132, # Ⅎ TURNED CAPITAL F
0x213B, # ℻ FACSIMILE SIGN
0x214D, # ⅍ AKTIESELSKAB
0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE
}
DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji()
EMOJI_ADDITIONS_FILE = os.path.join(
os.path.dirname(__file__), os.path.pardir, os.path.pardir,
'unicode', 'additions', 'emoji-data.txt')
# Characters we have decided we are doing as emoji-style in Android,
# despite UTR#51's recommendation
def get_android_emoji():
"""Return additional Android default emojis."""
android_emoji = set()
with open(EMOJI_ADDITIONS_FILE) as emoji_additions:
data = unicode_data._parse_semicolon_separated_data(
emoji_additions.read())
for codepoint, prop in data:
if prop == 'Emoji_Presentation':
android_emoji.add(int(codepoint, 16))
return android_emoji
def main(argv):
"""Subset the Noto Symbols font.
The first argument is the source file name, and the second argument is
the target file name.
"""
target_coverage = set()
# Add all characters in BLOCKS_TO_INCLUDE
for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
target_coverage.update(range(first, last+1))
# Add one-off characters
target_coverage |= ONE_OFF_ADDITIONS
# Remove characters preferably coming from Roboto
target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
# Remove characters that are supposed to default to emoji
android_emoji = get_android_emoji()
target_coverage -= DEFAULT_EMOJI | android_emoji
# Remove dentistry symbols, as their main use appears to be for CJK:
# http://www.unicode.org/L2/L2000/00098-n2195.pdf
target_coverage -= set(range(0x23BE, 0x23CC+1))
# Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
# mechanism to work properly.
target_coverage.remove(0x20E3)
source_file_name = argv[1]
target_file_name = argv[2]
subset.subset_font(
source_file_name,
target_file_name,
include=target_coverage)
second_subset_coverage = DEFAULT_EMOJI | android_emoji
second_subset_file_name = argv[3]
subset.subset_font(
source_file_name,
second_subset_file_name,
include=second_subset_coverage)
if __name__ == '__main__':
main(sys.argv)