blob: 9abe84d05c572b870a1021002a0b384ea0ef9bfc [file] [log] [blame]
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// #define VERBOSE_DEBUG
#define LOG_TAG "Minikin"
#include <algorithm>
#include <log/log.h>
#include "unicode/unistr.h"
#include "unicode/unorm2.h"
#include "FontLanguage.h"
#include "FontLanguageListCache.h"
#include "MinikinInternal.h"
#include <minikin/Emoji.h>
#include <minikin/FontCollection.h>
using std::vector;
namespace minikin {
template <typename T>
static inline T max(T a, T b) {
return a>b ? a : b;
}
const uint32_t EMOJI_STYLE_VS = 0xFE0F;
const uint32_t TEXT_STYLE_VS = 0xFE0E;
uint32_t FontCollection::sNextId = 0;
FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
std::vector<std::shared_ptr<FontFamily>> typefaces;
typefaces.push_back(typeface);
init(typefaces);
}
FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) :
mMaxChar(0) {
init(typefaces);
}
void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
android::AutoMutex _l(gMinikinLock);
mId = sNextId++;
vector<uint32_t> lastChar;
size_t nTypefaces = typefaces.size();
#ifdef VERBOSE_DEBUG
ALOGD("nTypefaces = %zd\n", nTypefaces);
#endif
const FontStyle defaultStyle;
for (size_t i = 0; i < nTypefaces; i++) {
const std::shared_ptr<FontFamily>& family = typefaces[i];
if (family->getClosestMatch(defaultStyle).font == nullptr) {
continue;
}
const SparseBitSet& coverage = family->getCoverage();
mFamilies.push_back(family); // emplace_back would be better
if (family->hasVSTable()) {
mVSFamilyVec.push_back(family);
}
mMaxChar = max(mMaxChar, coverage.length());
lastChar.push_back(coverage.nextSetBit(0));
const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
}
nTypefaces = mFamilies.size();
LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
"Font collection must have at least one valid typeface");
LOG_ALWAYS_FATAL_IF(nTypefaces > 254,
"Font collection may only have up to 254 font families.");
size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
// TODO: Use variation selector map for mRanges construction.
// A font can have a glyph for a base code point and variation selector pair but no glyph for
// the base code point without variation selector. The family won't be listed in the range in
// this case.
for (size_t i = 0; i < nPages; i++) {
Range dummy;
mRanges.push_back(dummy);
Range* range = &mRanges.back();
#ifdef VERBOSE_DEBUG
ALOGD("i=%zd: range start = %zd\n", i, offset);
#endif
range->start = mFamilyVec.size();
for (size_t j = 0; j < nTypefaces; j++) {
if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
const std::shared_ptr<FontFamily>& family = mFamilies[j];
mFamilyVec.push_back(static_cast<uint8_t>(j));
uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
#ifdef VERBOSE_DEBUG
ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
#endif
lastChar[j] = nextChar;
}
}
range->end = mFamilyVec.size();
}
// See the comment in Range for more details.
LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
"Exceeded the maximum indexable cmap coverage.");
}
// Special scores for the font fallback.
const uint32_t kUnsupportedFontScore = 0;
const uint32_t kFirstFontScore = UINT32_MAX;
// Calculates a font score.
// The score of the font family is based on three subscores.
// - Coverage Score: How well the font family covers the given character or variation sequence.
// - Language Score: How well the font family is appropriate for the language.
// - Variant Score: Whether the font family matches the variant. Note that this variant is not the
// one in BCP47. This is our own font variant (e.g., elegant, compact).
//
// Then, there is a priority for these three subscores as follow:
// Coverage Score > Language Score > Variant Score
// The returned score reflects this priority order.
//
// Note that there are two special scores.
// - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
// base character.
// - kFirstFontScore: When the font is the first font family in the collection and it supports the
// given character or variation sequence.
uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
const std::shared_ptr<FontFamily>& fontFamily) const {
const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
// No need to calculate other scores.
return coverageScore;
}
const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
// Subscores are encoded into 31 bits representation to meet the subscore priority.
// The highest 2 bits are for coverage score, then following 28 bits are for language score,
// then the last 1 bit is for variant score.
return coverageScore << 29 | languageScore << 1 | variantScore;
}
// Calculates a font score based on variation sequence coverage.
// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
// character.
// - Returns kFirstFontScore if the font family is the first font family in the collection and it
// supports the given character or variation sequence.
// - Returns 3 if the font family supports the variation sequence.
// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
// - Returns 1 if the variation selector is not specified or if the font family only supports the
// variation sequence's base character.
uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs,
const std::shared_ptr<FontFamily>& fontFamily) const {
const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
// The font doesn't support either variation sequence or even the base character.
return kUnsupportedFontScore;
}
if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
// If the first font family supports the given character or variation sequence, always use
// it.
return kFirstFontScore;
}
if (vs == 0) {
return 1;
}
if (hasVSGlyph) {
return 3;
}
if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) {
const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId());
bool hasEmojiFlag = false;
for (size_t i = 0; i < langs.size(); ++i) {
if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
hasEmojiFlag = true;
break;
}
}
if (vs == EMOJI_STYLE_VS) {
return hasEmojiFlag ? 2 : 1;
} else { // vs == TEXT_STYLE_VS
return hasEmojiFlag ? 1 : 2;
}
}
return 1;
}
// Calculate font scores based on the script matching, subtag matching and primary langauge matching.
//
// 1. If only the font's language matches or there is no matches between requested font and
// supported font, then the font obtains a score of 0.
// 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
// a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
// 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
// language-and-script obtains a socre of 3 with the same reason above.
//
// If two languages in the requested list have the same language score, the font matching with
// higher priority language gets a higher score. For example, in the case the user requested
// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
// than the font of "en-Latn".
//
// To achieve score calculation with priorities, the language score is determined as follows:
// LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
// matching score. The possible values of s(i) are 0, 1, 2, 3 and 4.
uint32_t FontCollection::calcLanguageMatchingScore(
uint32_t userLangListId, const FontFamily& fontFamily) {
const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId());
const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
uint32_t score = 0;
for (size_t i = 0; i < maxCompareNum; ++i) {
score = score * 5u + langList[i].calcScoreFor(fontLanguages);
}
return score;
}
// Calculates a font score based on variant ("compact" or "elegant") matching.
// - Returns 1 if the font doesn't have variant or the variant matches with the text style.
// - No score if the font has a variant but it doesn't match with the text style.
uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
}
// Implement heuristic for choosing best-match font. Here are the rules:
// 1. If first font in the collection has the character, it wins.
// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
// 3. Highest score wins, with ties resolved to the first font.
// This method never returns nullptr.
const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
uint32_t langListId, int variant) const {
if (ch >= mMaxChar) {
return mFamilies[0];
}
Range range = mRanges[ch >> kLogCharsPerPage];
if (vs != 0) {
range = { 0, static_cast<uint16_t>(mFamilies.size()) };
}
#ifdef VERBOSE_DEBUG
ALOGD("querying range %zd:%zd\n", range.start, range.end);
#endif
int bestFamilyIndex = -1;
uint32_t bestScore = kUnsupportedFontScore;
for (size_t i = range.start; i < range.end; i++) {
const std::shared_ptr<FontFamily>& family =
vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
if (score == kFirstFontScore) {
// If the first font family supports the given character or variation sequence, always
// use it.
return family;
}
if (score > bestScore) {
bestScore = score;
bestFamilyIndex = i;
}
}
if (bestFamilyIndex == -1) {
UErrorCode errorCode = U_ZERO_ERROR;
const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
if (U_SUCCESS(errorCode)) {
UChar decomposed[4];
int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
if (U_SUCCESS(errorCode) && len > 0) {
int off = 0;
U16_NEXT_UNSAFE(decomposed, off, ch);
return getFamilyForChar(ch, vs, langListId, variant);
}
}
return mFamilies[0];
}
return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
}
// Characters where we want to continue using existing font run for (or stick to the next run if
// they start a string), even if the font does not support them explicitly. These are handled
// properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
// usually meaningless to switch to a different font to display them.
static bool doesNotNeedFontSupport(uint32_t c) {
return c == 0x00AD // SOFT HYPHEN
|| c == 0x034F // COMBINING GRAPHEME JOINER
|| c == 0x061C // ARABIC LETTER MARK
|| (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
|| (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
|| (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
|| c == 0xFEFF // BYTE ORDER MARK
|| isVariationSelector(c);
}
// Characters where we want to continue using existing font run instead of
// recomputing the best match in the fallback list.
static const uint32_t stickyWhitelist[] = {
'!',
',',
'-',
'.',
':',
';',
'?',
0x00A0, // NBSP
0x2010, // HYPHEN
0x2011, // NB_HYPHEN
0x202F, // NNBSP
0x2640, // FEMALE_SIGN,
0x2642, // MALE_SIGN,
0x2695, // STAFF_OF_AESCULAPIUS
};
static bool isStickyWhitelisted(uint32_t c) {
for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
if (stickyWhitelist[i] == c) return true;
}
return false;
}
static inline bool isCombining(uint32_t c) {
return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
}
bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
uint32_t variationSelector) const {
if (!isVariationSelector(variationSelector)) {
return false;
}
if (baseCodepoint >= mMaxChar) {
return false;
}
// Currently mRanges can not be used here since it isn't aware of the variation sequence.
for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
return true;
}
}
// TODO: We can remove this lock by precomputing color emoji information.
android::AutoMutex _l(gMinikinLock);
// Even if there is no cmap format 14 subtable entry for the given sequence, should return true
// for <char, text presentation selector> case since we have special fallback rule for the
// sequence. Note that we don't need to restrict this to already standardized variation
// sequences, since Unicode is adding variation sequences more frequently now and may even move
// towards allowing text and emoji variation selectors on any character.
if (variationSelector == TEXT_STYLE_VS) {
for (size_t i = 0; i < mFamilies.size(); ++i) {
if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
return true;
}
}
}
return false;
}
constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
vector<Run>* result) const {
const uint32_t langListId = style.getLanguageListId();
int variant = style.getVariant();
const FontFamily* lastFamily = nullptr;
Run* run = nullptr;
if (string_size == 0) {
return;
}
const uint32_t kEndOfString = 0xFFFFFFFF;
uint32_t nextCh = 0;
uint32_t prevCh = 0;
size_t nextUtf16Pos = 0;
size_t readLength = 0;
U16_NEXT(string, readLength, string_size, nextCh);
if (U_IS_SURROGATE(nextCh)) {
nextCh = REPLACEMENT_CHARACTER;
}
do {
const uint32_t ch = nextCh;
const size_t utf16Pos = nextUtf16Pos;
nextUtf16Pos = readLength;
if (readLength < string_size) {
U16_NEXT(string, readLength, string_size, nextCh);
if (U_IS_SURROGATE(nextCh)) {
nextCh = REPLACEMENT_CHARACTER;
}
} else {
nextCh = kEndOfString;
}
bool shouldContinueRun = false;
if (doesNotNeedFontSupport(ch)) {
// Always continue if the character is a format character not needed to be in the font.
shouldContinueRun = true;
} else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
// Continue using existing font as long as it has coverage and is whitelisted.
shouldContinueRun = lastFamily->getCoverage().get(ch);
}
if (!shouldContinueRun) {
const std::shared_ptr<FontFamily>& family = getFamilyForChar(
ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant);
if (utf16Pos == 0 || family.get() != lastFamily) {
size_t start = utf16Pos;
// Workaround for combining marks and emoji modifiers until we implement
// per-cluster font selection: if a combining mark or an emoji modifier is found in
// a different font that also supports the previous character, attach previous
// character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
// handled properly by this since it's a combining mark too.
if (utf16Pos != 0 &&
(isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
family != nullptr && family->getCoverage().get(prevCh)) {
const size_t prevChLength = U16_LENGTH(prevCh);
if (run != nullptr) {
run->end -= prevChLength;
if (run->start == run->end) {
result->pop_back();
}
}
start -= prevChLength;
}
if (lastFamily == nullptr) {
// This is the first family ever assigned. We are either seeing the very first
// character (which means start would already be zero), or we have only seen
// characters that don't need any font support (which means we need to adjust
// start to be 0 to include those characters).
start = 0;
}
result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
run = &result->back();
lastFamily = family.get();
}
}
prevCh = ch;
if (run != nullptr) {
run->end = nextUtf16Pos; // exclusive
}
} while (nextCh != kEndOfString);
if (lastFamily == nullptr) {
// No character needed any font support, so it doesn't really matter which font they end up
// getting displayed in. We put the whole string in one run, using the first font.
result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
}
}
FakedFont FontCollection::baseFontFaked(FontStyle style) {
return mFamilies[0]->getClosestMatch(style);
}
std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
const std::vector<FontVariation>& variations) {
if (variations.empty() || mSupportedAxes.empty()) {
return nullptr;
}
bool hasSupportedAxis = false;
for (const FontVariation& variation : variations) {
if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
hasSupportedAxis = true;
break;
}
}
if (!hasSupportedAxis) {
// None of variation axes are supported by this font collection.
return nullptr;
}
std::vector<std::shared_ptr<FontFamily> > families;
for (const std::shared_ptr<FontFamily>& family : mFamilies) {
std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
if (newFamily) {
families.push_back(newFamily);
} else {
families.push_back(family);
}
}
return std::shared_ptr<FontCollection>(new FontCollection(families));
}
uint32_t FontCollection::getId() const {
return mId;
}
} // namespace minikin