blob: d8afcdc515579f35328bd80558280c7dc28d3ba4 [file] [log] [blame]
/*
* Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "platform/fonts/win/FontFallbackWin.h"
#include "SkFontMgr.h"
#include "SkTypeface.h"
#include "wtf/HashMap.h"
#include "wtf/text/StringHash.h"
#include "wtf/text/WTFString.h"
#include <limits>
#include <unicode/locid.h>
#include <unicode/uchar.h>
namespace WebCore {
namespace {
static inline bool isFontPresent(const UChar* fontName, SkFontMgr* fontManager)
{
String family = fontName;
RefPtr<SkTypeface> tf = adoptRef(fontManager->legacyCreateTypeface(family.utf8().data(), SkTypeface::kNormal));
if (!tf)
return false;
SkTypeface::LocalizedStrings* actualFamilies = tf->createFamilyNameIterator();
bool matchesRequestedFamily = false;
SkTypeface::LocalizedString actualFamily;
while (actualFamilies->next(&actualFamily)) {
if (equalIgnoringCase(family, AtomicString::fromUTF8(actualFamily.fString.c_str()))) {
matchesRequestedFamily = true;
break;
}
}
actualFamilies->unref();
return matchesRequestedFamily;
}
// A simple mapping from UScriptCode to family name. This is a sparse array,
// which works well since the range of UScriptCode values is small.
typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
void initializeScriptMonospaceFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager)
{
struct FontMap {
UScriptCode script;
const UChar* family;
};
static const FontMap fontMap[] = {
{ USCRIPT_HEBREW, L"courier new" },
{ USCRIPT_ARABIC, L"courier new" },
};
for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
scriptFontMap[fontMap[i].script] = fontMap[i].family;
}
void initializeScriptFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager)
{
struct FontMap {
UScriptCode script;
const UChar* family;
};
static const FontMap fontMap[] = {
{USCRIPT_LATIN, L"times new roman"},
{USCRIPT_GREEK, L"times new roman"},
{USCRIPT_CYRILLIC, L"times new roman"},
// FIXME: Consider trying new Vista fonts before XP fonts for CJK.
// Some Vista users do want to use Vista cleartype CJK fonts. If we
// did, the results of tests with CJK characters would have to be
// regenerated for Vista.
{USCRIPT_SIMPLIFIED_HAN, L"simsun"},
{USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
{USCRIPT_HIRAGANA, L"ms pgothic"},
{USCRIPT_KATAKANA, L"ms pgothic"},
{USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
{USCRIPT_HANGUL, L"gulim"},
{USCRIPT_THAI, L"tahoma"},
{USCRIPT_HEBREW, L"david"},
{USCRIPT_ARABIC, L"tahoma"},
{USCRIPT_DEVANAGARI, L"mangal"},
{USCRIPT_BENGALI, L"vrinda"},
{USCRIPT_GURMUKHI, L"raavi"},
{USCRIPT_GUJARATI, L"shruti"},
{USCRIPT_TAMIL, L"latha"},
{USCRIPT_TELUGU, L"gautami"},
{USCRIPT_KANNADA, L"tunga"},
{USCRIPT_GEORGIAN, L"sylfaen"},
{USCRIPT_ARMENIAN, L"sylfaen"},
{USCRIPT_THAANA, L"mv boli"},
{USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
{USCRIPT_CHEROKEE, L"plantagenet cherokee"},
{USCRIPT_MONGOLIAN, L"mongolian balti"},
// For USCRIPT_COMMON, we map blocks to scripts when
// that makes sense.
};
struct ScriptToFontFamilies {
UScriptCode script;
const UChar** families;
};
// Kartika on Vista or earlier lacks the support for Chillu
// letters added to Unicode 5.1.
// Try AnjaliOldLipi (a very widely used Malaylalam font with the full
// Unicode 5.x support) before falling back to Kartika.
static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0};
// Try Khmer OS before Vista fonts because 'Khmer OS' goes along better
// with Latin and looks better/larger for the same size.
static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0};
// For the following 6 scripts, two or fonts are listed. The fonts in
// the 1st slot are not available on Windows XP. To support these
// scripts on XP, listed in the rest of slots are widely used
// fonts.
static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0};
static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0};
static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0};
static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0};
static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0};
static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0};
// http://www.bethmardutho.org/support/meltho/download/index.php
static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0};
// No Myanmar/Burmese font is shipped with Windows, yet. Try a few
// widely available/used ones that supports Unicode 5.1 or later.
static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0};
static const ScriptToFontFamilies scriptToFontFamilies[] = {
{USCRIPT_MALAYALAM, malayalamFonts},
{USCRIPT_KHMER, khmerFonts},
{USCRIPT_ETHIOPIC, ethiopicFonts},
{USCRIPT_ORIYA, oriyaFonts},
{USCRIPT_LAO, laoFonts},
{USCRIPT_TIBETAN, tibetanFonts},
{USCRIPT_SINHALA, sinhalaFonts},
{USCRIPT_YI, yiFonts},
{USCRIPT_SYRIAC, syriacFonts},
{USCRIPT_MYANMAR, myanmarFonts},
};
for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
scriptFontMap[fontMap[i].script] = fontMap[i].family;
// FIXME: Instead of scanning the hard-coded list, we have to
// use EnumFont* to 'inspect' fonts to pick up fonts covering scripts
// when it's possible (e.g. using OS/2 table). If we do that, this
// had better be pulled out of here.
for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) {
UScriptCode script = scriptToFontFamilies[i].script;
scriptFontMap[script] = 0;
const UChar** familyPtr = scriptToFontFamilies[i].families;
while (*familyPtr) {
if (isFontPresent(*familyPtr, fontManager)) {
scriptFontMap[script] = *familyPtr;
break;
}
++familyPtr;
}
}
// Initialize the locale-dependent mapping.
// Since Chrome synchronizes the ICU default locale with its UI locale,
// this ICU locale tells the current UI locale of Chrome.
icu::Locale locale = icu::Locale::getDefault();
const UChar* localeFamily = 0;
if (locale == icu::Locale::getJapanese()) {
localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
} else if (locale == icu::Locale::getKorean()) {
localeFamily = scriptFontMap[USCRIPT_HANGUL];
} else if (locale == icu::Locale::getTraditionalChinese()) {
localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN];
} else {
// For other locales, use the simplified Chinese font for Han.
localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
}
if (localeFamily)
scriptFontMap[USCRIPT_HAN] = localeFamily;
}
// There are a lot of characters in USCRIPT_COMMON that can be covered
// by fonts for scripts closely related to them. See
// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
// FIXME: make this more efficient with a wider coverage
UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
{
UBlockCode block = ublock_getCode(ucs4);
switch (block) {
case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
return USCRIPT_HAN;
case UBLOCK_HIRAGANA:
case UBLOCK_KATAKANA:
return USCRIPT_HIRAGANA;
case UBLOCK_ARABIC:
return USCRIPT_ARABIC;
case UBLOCK_THAI:
return USCRIPT_THAI;
case UBLOCK_GREEK:
return USCRIPT_GREEK;
case UBLOCK_DEVANAGARI:
// For Danda and Double Danda (U+0964, U+0965), use a Devanagari
// font for now although they're used by other scripts as well.
// Without a context, we can't do any better.
return USCRIPT_DEVANAGARI;
case UBLOCK_ARMENIAN:
return USCRIPT_ARMENIAN;
case UBLOCK_GEORGIAN:
return USCRIPT_GEORGIAN;
case UBLOCK_KANNADA:
return USCRIPT_KANNADA;
default:
return USCRIPT_COMMON;
}
}
UScriptCode getScript(int ucs4)
{
UErrorCode err = U_ZERO_ERROR;
UScriptCode script = uscript_getScript(ucs4, &err);
// If script is invalid, common or inherited or there's an error,
// infer a script based on the unicode block of a character.
if (script <= USCRIPT_INHERITED || U_FAILURE(err))
script = getScriptBasedOnUnicodeBlock(ucs4);
return script;
}
} // namespace
// FIXME: this is font fallback code version 0.1
// - Cover all the scripts
// - Get the default font for each script/generic family from the
// preference instead of hardcoding in the source.
// (at least, read values from the registry for IE font settings).
// - Support generic families (from FontDescription)
// - If the default font for a script is not available,
// try some more fonts known to support it. Finally, we can
// use EnumFontFamilies or similar APIs to come up with a list of
// fonts supporting the script and cache the result.
// - Consider using UnicodeSet (or UnicodeMap) converted from
// GLYPHSET (BMP) or directly read from truetype cmap tables to
// keep track of which character is supported by which font
// - Update script_font_cache in response to WM_FONTCHANGE
const UChar* getFontFamilyForScript(UScriptCode script,
FontDescription::GenericFamilyType generic,
SkFontMgr* fontManager)
{
static ScriptToFontMap scriptFontMap;
static ScriptToFontMap scriptMonospaceFontMap;
static bool initialized = false;
if (!initialized) {
initializeScriptFontMap(scriptFontMap, fontManager);
initializeScriptMonospaceFontMap(scriptMonospaceFontMap, fontManager);
initialized = true;
}
if (script == USCRIPT_INVALID_CODE)
return 0;
ASSERT(script < USCRIPT_CODE_LIMIT);
if (generic == FontDescription::MonospaceFamily && scriptMonospaceFontMap[script])
return scriptMonospaceFontMap[script];
return scriptFontMap[script];
}
// FIXME:
// - Handle 'Inherited', 'Common' and 'Unknown'
// (see http://www.unicode.org/reports/tr24/#Usage_Model )
// For 'Inherited' and 'Common', perhaps we need to
// accept another parameter indicating the previous family
// and just return it.
// - All the characters (or characters up to the point a single
// font can cover) need to be taken into account
const UChar* getFallbackFamily(UChar32 character,
FontDescription::GenericFamilyType generic,
UScriptCode* scriptChecked,
SkFontMgr* fontManager)
{
ASSERT(character);
UScriptCode script = getScript(character);
// For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
// Han (determined in a locale-dependent way above). Full-width ASCII
// characters are rather widely used in Japanese and Chinese documents and
// they're fully covered by Chinese, Japanese and Korean fonts.
if (0xFF00 < character && character < 0xFF5F)
script = USCRIPT_HAN;
if (script == USCRIPT_COMMON)
script = getScriptBasedOnUnicodeBlock(character);
const UChar* family = getFontFamilyForScript(script, generic, fontManager);
// Another lame work-around to cover non-BMP characters.
// If the font family for script is not found or the character is
// not in BMP (> U+FFFF), we resort to the hard-coded list of
// fallback fonts for now.
if (!family || character > 0xFFFF) {
int plane = character >> 16;
switch (plane) {
case 1:
family = L"code2001";
break;
case 2:
// Use a Traditional Chinese ExtB font if in Traditional Chinese locale.
// Otherwise, use a Simplified Chinese ExtB font. Windows Japanese
// fonts do support a small subset of ExtB (that are included in JIS X 0213),
// but its coverage is rather sparse.
// Eventually, this should be controlled by lang/xml:lang.
if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese())
family = L"pmingliu-extb";
else
family = L"simsun-extb";
break;
default:
family = L"lucida sans unicode";
}
}
if (scriptChecked)
*scriptChecked = script;
return family;
}
} // namespace WebCore