Merge "Reject unsorted cmap entries. DO NOT MERGE am: 891e1569fa -s ours" into lmp-mr1-dev am: abc4ac75da am: 33f42da9a9 -s ours am: a02dccd656 am: e4d2aeec0e am: 0fb4dfadd6 am: 5bc8e2aad7 -s ours am: 85f8b08e86 am: c741c242b6 am: 3c3c4a9c3c am: 5b013a6193 am: fa5c0e9773 -s ours Change-Id: Ic452b5992272071c9a477fce616fd1194a171e7d

commit: d6d6828f166a3d01c61451e453ef3169b98ee2f5 [log] [tgz]
author: Seigo Nonaka <nona@google.com> Wed Jul 12 19:18:31 2017 +0000
committer: android-build-merger <android-build-merger@google.com> Wed Jul 12 19:18:31 2017 +0000
tree: 7fd2dfe27fb8241ad9ef610a37ece5e1d43188c4
parent: d60d6464fa1815ac452a0c02a63f992b9f98e74b [diff]
parent: fa5c0e9773d282439272c2338d53ad517c7af42d [diff]
diff --git a/app/Android.mk b/app/Android.mk
index 2038683..23305b7 100644
--- a/app/Android.mk
+++ b/app/Android.mk

@@ -28,7 +28,7 @@
 
 LOCAL_SHARED_LIBRARIES := \
     liblog \
-    libicuuc-host
+    libicuuc
 
 LOCAL_SRC_FILES += \
     HyphTool.cpp

diff --git a/app/HyphTool.cpp b/app/HyphTool.cpp
index 730abad..403d374 100644
--- a/app/HyphTool.cpp
+++ b/app/HyphTool.cpp

@@ -2,14 +2,16 @@
 #include <sys/stat.h>
 #include <string.h>
 
+#include "unicode/locid.h"
 #include "utils/Log.h"
 
 #include <vector>
 #include <minikin/Hyphenator.h>
 
-using android::Hyphenator;
+using minikin::HyphenationType;
+using minikin::Hyphenator;
 
-Hyphenator* loadHybFile(const char* fn) {
+Hyphenator* loadHybFile(const char* fn, int minPrefix, int minSuffix) {
     struct stat statbuf;
     int status = stat(fn, &statbuf);
     if (status < 0) {
@@ -24,17 +26,18 @@
     }
     uint8_t* buf = new uint8_t[size];
     size_t read_size = fread(buf, 1, size, f);
+    fclose(f);
     if (read_size < size) {
         fprintf(stderr, "error reading %s\n", fn);
         delete[] buf;
         return nullptr;
     }
-    return Hyphenator::loadBinary(buf);
+    return Hyphenator::loadBinary(buf, minPrefix, minSuffix);
 }
 
 int main(int argc, char** argv) {
-    Hyphenator* hyph = loadHybFile("/tmp/en.hyb");  // should also be configurable
-    std::vector<uint8_t> result;
+    Hyphenator* hyph = loadHybFile("/tmp/en.hyb", 2, 3);  // should also be configurable
+    std::vector<HyphenationType> result;
     std::vector<uint16_t> word;
     if (argc < 2) {
         fprintf(stderr, "usage: hyphtool word\n");
@@ -50,9 +53,9 @@
         // ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :(
         word.push_back(c);
     }
-    hyph->hyphenate(&result, word.data(), word.size());
+    hyph->hyphenate(&result, word.data(), word.size(), icu::Locale::getUS());
     for (size_t i = 0; i < len; i++) {
-        if (result[i] != 0) {
+        if (result[i] != HyphenationType::DONT_BREAK) {
             printf("-");
         }
         printf("%c", word[i]);

diff --git a/include/minikin/CmapCoverage.h b/include/minikin/CmapCoverage.h
index 56abac7..af5960d 100644
--- a/include/minikin/CmapCoverage.h
+++ b/include/minikin/CmapCoverage.h

@@ -19,14 +19,17 @@
 
 #include <minikin/SparseBitSet.h>
 
-namespace android {
+#include <memory>
+#include <vector>
+
+namespace minikin {
 
 class CmapCoverage {
 public:
-    static bool getCoverage(SparseBitSet &coverage, const uint8_t* cmap_data, size_t cmap_size,
-            bool* has_cmap_format14_subtable);
+    static SparseBitSet getCoverage(const uint8_t* cmap_data, size_t cmap_size,
+            std::vector<std::unique_ptr<SparseBitSet>>* out);
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_CMAP_COVERAGE_H

diff --git a/libs/minikin/MinikinRefCounted.cpp b/include/minikin/Emoji.h
similarity index 60%
rename from libs/minikin/MinikinRefCounted.cpp
rename to include/minikin/Emoji.h
index 9fa3ae4..2826173 100644
--- a/libs/minikin/MinikinRefCounted.cpp
+++ b/include/minikin/Emoji.h

@@ -14,22 +14,21 @@
  * limitations under the License.
  */
 
-// Base class for reference counted objects in Minikin
+#include <unicode/uchar.h>
 
-#include "MinikinInternal.h"
+namespace minikin {
 
-#include <minikin/MinikinRefCounted.h>
+// Returns true if c is emoji.
+bool isEmoji(uint32_t c);
 
-namespace android {
+// Returns true if c is emoji modifier base.
+bool isEmojiBase(uint32_t c);
 
-void MinikinRefCounted::Ref() {
-    AutoMutex _l(gMinikinLock);
-    this->RefLocked();
-}
+// Returns true if c is emoji modifier.
+bool isEmojiModifier(uint32_t c);
 
-void MinikinRefCounted::Unref() {
-    AutoMutex _l(gMinikinLock);
-    this->UnrefLocked();
-}
+// Bidi override for ICU that knows about new emoji.
+UCharDirection emojiBidiOverride(const void* context, UChar32 c);
 
-}
+}  // namespace minikin
+

diff --git a/include/minikin/FontCollection.h b/include/minikin/FontCollection.h
index c3c183d..138ba45 100644
--- a/include/minikin/FontCollection.h
+++ b/include/minikin/FontCollection.h

@@ -17,19 +17,19 @@
 #ifndef MINIKIN_FONT_COLLECTION_H
 #define MINIKIN_FONT_COLLECTION_H
 
+#include <memory>
+#include <unordered_set>
 #include <vector>
 
-#include <minikin/MinikinRefCounted.h>
 #include <minikin/MinikinFont.h>
 #include <minikin/FontFamily.h>
 
-namespace android {
+namespace minikin {
 
-class FontCollection : public MinikinRefCounted {
+class FontCollection {
 public:
-    explicit FontCollection(const std::vector<FontFamily*>& typefaces);
-
-    ~FontCollection();
+    explicit FontCollection(const std::vector<std::shared_ptr<FontFamily>>& typefaces);
+    explicit FontCollection(std::shared_ptr<FontFamily>&& typeface);
 
     struct Run {
         FakedFont fakedFont;
@@ -45,29 +45,45 @@
     // selector pair, or invalid variation selector is passed.
     bool hasVariationSelector(uint32_t baseCodepoint, uint32_t variationSelector) const;
 
-    // Get the base font for the given style, useful for font-wide metrics.
-    MinikinFont* baseFont(FontStyle style);
-
     // Get base font with fakery information (fake bold could affect metrics)
     FakedFont baseFontFaked(FontStyle style);
 
+    // Creates new FontCollection based on this collection while applying font variations. Returns
+    // nullptr if none of variations apply to this collection.
+    std::shared_ptr<FontCollection>
+            createCollectionWithVariation(const std::vector<FontVariation>& variations);
+
+    const std::unordered_set<AxisTag>& getSupportedTags() const {
+        return mSupportedAxes;
+    }
+
     uint32_t getId() const;
 
 private:
     static const int kLogCharsPerPage = 8;
     static const int kPageMask = (1 << kLogCharsPerPage) - 1;
 
+    // mFamilyVec holds the indices of the mFamilies and mRanges holds the range of indices of
+    // mFamilyVec. The maximum number of pages is 0x10FF (U+10FFFF >> 8). The maximum number of
+    // the fonts is 0xFF. Thus, technically the maximum length of mFamilyVec is 0x10EE01
+    // (0x10FF * 0xFF). However, in practice, 16-bit integers are enough since most fonts supports
+    // only limited range of code points.
     struct Range {
-        size_t start;
-        size_t end;
+        uint16_t start;
+        uint16_t end;
     };
 
-    FontFamily* getFamilyForChar(uint32_t ch, uint32_t vs, uint32_t langListId, int variant) const;
+    // Initialize the FontCollection.
+    void init(const std::vector<std::shared_ptr<FontFamily>>& typefaces);
+
+    const std::shared_ptr<FontFamily>& getFamilyForChar(uint32_t ch, uint32_t vs,
+            uint32_t langListId, int variant) const;
 
     uint32_t calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
-                             FontFamily* fontFamily) const;
+            const std::shared_ptr<FontFamily>& fontFamily) const;
 
-    uint32_t calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const;
+    uint32_t calcCoverageScore(uint32_t ch, uint32_t vs,
+            const std::shared_ptr<FontFamily>& fontFamily) const;
 
     static uint32_t calcLanguageMatchingScore(uint32_t userLangListId,
                                               const FontFamily& fontFamily);
@@ -83,21 +99,26 @@
     // Highest UTF-32 code point that can be mapped
     uint32_t mMaxChar;
 
-    // This vector has ownership of the bitsets and typeface objects.
+    // This vector has pointers to the all font family instances in this collection.
     // This vector can't be empty.
-    std::vector<FontFamily*> mFamilies;
+    std::vector<std::shared_ptr<FontFamily>> mFamilies;
 
-    // This vector contains pointers into mInstances
+    // Following two vectors are pre-calculated tables for resolving coverage faster.
+    // For example, to iterate over all fonts which support Unicode code point U+XXYYZZ,
+    // iterate font families index from mFamilyVec[mRanges[0xXXYY].start] to
+    // mFamilyVec[mRange[0xXXYY].end] instead of whole mFamilies.
+    // This vector contains indices into mFamilies.
     // This vector can't be empty.
-    std::vector<FontFamily*> mFamilyVec;
-
-    // This vector has pointers to the font family instance which has cmap 14 subtable.
-    std::vector<FontFamily*> mVSFamilyVec;
-
-    // These are offsets into mInstanceVec, one range per page
     std::vector<Range> mRanges;
+    std::vector<uint8_t> mFamilyVec;
+
+    // This vector has pointers to the font family instances which have cmap 14 subtables.
+    std::vector<std::shared_ptr<FontFamily>> mVSFamilyVec;
+
+    // Set of supported axes in this collection.
+    std::unordered_set<AxisTag> mSupportedAxes;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_FONT_COLLECTION_H

diff --git a/include/minikin/FontFamily.h b/include/minikin/FontFamily.h
index 81033d2..04c95bc 100644
--- a/include/minikin/FontFamily.h
+++ b/include/minikin/FontFamily.h

@@ -17,16 +17,18 @@
 #ifndef MINIKIN_FONT_FAMILY_H
 #define MINIKIN_FONT_FAMILY_H
 
-#include <vector>
+#include <memory>
 #include <string>
+#include <unordered_set>
+#include <vector>
+
 #include <hb.h>
 
 #include <utils/TypeHelpers.h>
 
-#include <minikin/MinikinRefCounted.h>
 #include <minikin/SparseBitSet.h>
 
-namespace android {
+namespace minikin {
 
 class MinikinFont;
 
@@ -37,7 +39,7 @@
 public:
     FontStyle() : FontStyle(0 /* variant */, 4 /* weight */, false /* italic */) {}
     FontStyle(int weight, bool italic) : FontStyle(0 /* variant */, weight, italic) {}
-    FontStyle(uint32_t langListId)
+    FontStyle(uint32_t langListId)  // NOLINT(implicit)
             : FontStyle(langListId, 0 /* variant */, 4 /* weight */, false /* italic */) {}
 
     FontStyle(int variant, int weight, bool italic);
@@ -52,7 +54,7 @@
           return bits == other.bits && mLanguageListId == other.mLanguageListId;
     }
 
-    hash_t hash() const;
+    android::hash_t hash() const;
 
     // Looks up a language list from an internal cache and returns its ID.
     // If the passed language list is not in the cache, registers it and returns newly assigned ID.
@@ -75,7 +77,7 @@
     VARIANT_ELEGANT = 2,
 };
 
-inline hash_t hash_type(const FontStyle &style) {
+inline android::hash_t hash_type(const FontStyle &style) {
     return style.hash();
 }
 
@@ -98,66 +100,80 @@
     FontFakery fakery;
 };
 
-class FontFamily : public MinikinRefCounted {
+typedef uint32_t AxisTag;
+
+struct Font {
+    Font(const std::shared_ptr<MinikinFont>& typeface, FontStyle style);
+    Font(std::shared_ptr<MinikinFont>&& typeface, FontStyle style);
+    Font(Font&& o);
+    Font(const Font& o);
+
+    std::shared_ptr<MinikinFont> typeface;
+    FontStyle style;
+
+    std::unordered_set<AxisTag> getSupportedAxesLocked() const;
+};
+
+struct FontVariation {
+    FontVariation(AxisTag axisTag, float value) : axisTag(axisTag), value(value) {}
+    AxisTag axisTag;
+    float value;
+};
+
+class FontFamily {
 public:
-    FontFamily();
+    explicit FontFamily(std::vector<Font>&& fonts);
+    FontFamily(int variant, std::vector<Font>&& fonts);
+    FontFamily(uint32_t langId, int variant, std::vector<Font>&& fonts);
 
-    FontFamily(int variant);
-
-    FontFamily(uint32_t langId, int variant)
-        : mLangId(langId),
-        mVariant(variant),
-        mHasVSTable(false),
-        mCoverageValid(false) {
-    }
-
-    ~FontFamily();
-
-    // Add font to family, extracting style information from the font
-    bool addFont(MinikinFont* typeface);
-
-    void addFont(MinikinFont* typeface, FontStyle style);
+    // TODO: Good to expose FontUtil.h.
+    static bool analyzeStyle(const std::shared_ptr<MinikinFont>& typeface, int* weight,
+            bool* italic);
     FakedFont getClosestMatch(FontStyle style) const;
 
     uint32_t langId() const { return mLangId; }
     int variant() const { return mVariant; }
 
     // API's for enumerating the fonts in a family. These don't guarantee any particular order
-    size_t getNumFonts() const;
-    MinikinFont* getFont(size_t index) const;
-    FontStyle getStyle(size_t index) const;
+    size_t getNumFonts() const { return mFonts.size(); }
+    const std::shared_ptr<MinikinFont>& getFont(size_t index) const {
+        return mFonts[index].typeface;
+    }
+    FontStyle getStyle(size_t index) const { return mFonts[index].style; }
     bool isColorEmojiFamily() const;
+    const std::unordered_set<AxisTag>& supportedAxes() const { return mSupportedAxes; }
 
-    // Get Unicode coverage. Lifetime of returned bitset is same as receiver. May return nullptr on
-    // error.
-    const SparseBitSet* getCoverage();
+    // Get Unicode coverage.
+    const SparseBitSet& getCoverage() const { return mCoverage; }
 
     // Returns true if the font has a glyph for the code point and variation selector pair.
     // Caller should acquire a lock before calling the method.
-    bool hasGlyph(uint32_t codepoint, uint32_t variationSelector);
+    bool hasGlyph(uint32_t codepoint, uint32_t variationSelector) const;
 
     // Returns true if this font family has a variaion sequence table (cmap format 14 subtable).
-    bool hasVSTable() const;
+    bool hasVSTable() const { return !mCmapFmt14Coverage.empty(); }
+
+    // Creates new FontFamily based on this family while applying font variations. Returns nullptr
+    // if none of variations apply to this family.
+    std::shared_ptr<FontFamily> createFamilyWithVariation(
+            const std::vector<FontVariation>& variations) const;
 
 private:
-    void addFontLocked(MinikinFont* typeface, FontStyle style);
+    void computeCoverage();
 
-    class Font {
-    public:
-        Font(MinikinFont* typeface, FontStyle style) :
-            typeface(typeface), style(style) { }
-        MinikinFont* typeface;
-        FontStyle style;
-    };
     uint32_t mLangId;
     int mVariant;
     std::vector<Font> mFonts;
+    std::unordered_set<AxisTag> mSupportedAxes;
 
     SparseBitSet mCoverage;
-    bool mHasVSTable;
-    bool mCoverageValid;
+    std::vector<std::unique_ptr<SparseBitSet>> mCmapFmt14Coverage;
+
+    // Forbid copying and assignment.
+    FontFamily(const FontFamily&) = delete;
+    void operator=(const FontFamily&) = delete;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_FONT_FAMILY_H

diff --git a/include/minikin/GraphemeBreak.h b/include/minikin/GraphemeBreak.h
index 3120101..f1b5102 100644
--- a/include/minikin/GraphemeBreak.h
+++ b/include/minikin/GraphemeBreak.h

@@ -17,7 +17,7 @@
 #ifndef MINIKIN_GRAPHEME_BREAK_H
 #define MINIKIN_GRAPHEME_BREAK_H
 
-namespace android {
+namespace minikin {
 
 class GraphemeBreak {
 public:
@@ -31,17 +31,17 @@
     };
 
     // Determine whether the given offset is a grapheme break.
-    // This implementation generally follows Unicode TR29 extended
-    // grapheme break, but with some tweaks to more closely match
-    // existing implementations.
-    static bool isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset);
+    // This implementation generally follows Unicode's UTR #29 extended
+    // grapheme break, with various tweaks.
+    static bool isGraphemeBreak(const float* advances, const uint16_t* buf, size_t start,
+            size_t count, size_t offset);
 
     // Matches Android's Java API. Note, return (size_t)-1 for AT to
     // signal non-break because unsigned return type.
-    static size_t getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
-            size_t offset, MoveOpt opt);
+    static size_t getTextRunCursor(const float* advances, const uint16_t* buf, size_t start,
+            size_t count, size_t offset, MoveOpt opt);
 };
 
-}  // namespace android
+}  // namespace minikin
 
-#endif  // MINIKIN_GRAPHEME_BREAK_H
\ No newline at end of file
+#endif  // MINIKIN_GRAPHEME_BREAK_H

diff --git a/include/minikin/Hyphenator.h b/include/minikin/Hyphenator.h
index 9605205..2b8ccb7 100644
--- a/include/minikin/Hyphenator.h
+++ b/include/minikin/Hyphenator.h

@@ -18,51 +18,140 @@
  * An implementation of Liang's hyphenation algorithm.
  */
 
+#include "unicode/locid.h"
 #include <memory>
 #include <unordered_map>
 
 #ifndef MINIKIN_HYPHENATOR_H
 #define MINIKIN_HYPHENATOR_H
 
-namespace android {
+namespace minikin {
+
+enum class HyphenationType : uint8_t {
+    // Note: There are implicit assumptions scattered in the code that DONT_BREAK is 0.
+
+    // Do not break.
+    DONT_BREAK = 0,
+    // Break the line and insert a normal hyphen.
+    BREAK_AND_INSERT_HYPHEN = 1,
+    // Break the line and insert an Armenian hyphen (U+058A).
+    BREAK_AND_INSERT_ARMENIAN_HYPHEN = 2,
+    // Break the line and insert a maqaf (Hebrew hyphen, U+05BE).
+    BREAK_AND_INSERT_MAQAF = 3,
+    // Break the line and insert a Canadian Syllabics hyphen (U+1400).
+    BREAK_AND_INSERT_UCAS_HYPHEN = 4,
+    // Break the line, but don't insert a hyphen. Used for cases when there is already a hyphen
+    // present or the script does not use a hyphen (e.g. in Malayalam).
+    BREAK_AND_DONT_INSERT_HYPHEN = 5,
+    // Break and replace the last code unit with hyphen. Used for Catalan "l·l" which hyphenates
+    // as "l-/l".
+    BREAK_AND_REPLACE_WITH_HYPHEN = 6,
+    // Break the line, and repeat the hyphen (which is the last character) at the beginning of the
+    // next line. Used in Polish, where "czerwono-niebieska" should hyphenate as
+    // "czerwono-/-niebieska".
+    BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE = 7,
+    // Break the line, insert a ZWJ and hyphen at the first line, and a ZWJ at the second line.
+    // This is used in Arabic script, mostly for writing systems of Central Asia. It's our default
+    // behavior when a soft hyphen is used in Arabic script.
+    BREAK_AND_INSERT_HYPHEN_AND_ZWJ = 8
+};
+
+// The hyphen edit represents an edit to the string when a word is
+// hyphenated. The most common hyphen edit is adding a "-" at the end
+// of a syllable, but nonstandard hyphenation allows for more choices.
+// Note that a HyphenEdit can hold two types of edits at the same time,
+// One at the beginning of the string/line and one at the end.
+class HyphenEdit {
+public:
+    static const uint32_t NO_EDIT = 0x00;
+
+    static const uint32_t INSERT_HYPHEN_AT_END = 0x01;
+    static const uint32_t INSERT_ARMENIAN_HYPHEN_AT_END = 0x02;
+    static const uint32_t INSERT_MAQAF_AT_END = 0x03;
+    static const uint32_t INSERT_UCAS_HYPHEN_AT_END = 0x04;
+    static const uint32_t INSERT_ZWJ_AND_HYPHEN_AT_END = 0x05;
+    static const uint32_t REPLACE_WITH_HYPHEN_AT_END = 0x06;
+    static const uint32_t BREAK_AT_END = 0x07;
+
+    static const uint32_t INSERT_HYPHEN_AT_START = 0x01 << 3;
+    static const uint32_t INSERT_ZWJ_AT_START = 0x02 << 3;
+    static const uint32_t BREAK_AT_START = 0x03 << 3;
+
+    // Keep in sync with the definitions in the Java code at:
+    // frameworks/base/graphics/java/android/graphics/Paint.java
+    static const uint32_t MASK_END_OF_LINE = 0x07;
+    static const uint32_t MASK_START_OF_LINE = 0x03 << 3;
+
+    inline static bool isReplacement(uint32_t hyph) {
+        return hyph == REPLACE_WITH_HYPHEN_AT_END;
+    }
+
+    inline static bool isInsertion(uint32_t hyph) {
+        return (hyph == INSERT_HYPHEN_AT_END
+                || hyph == INSERT_ARMENIAN_HYPHEN_AT_END
+                || hyph == INSERT_MAQAF_AT_END
+                || hyph == INSERT_UCAS_HYPHEN_AT_END
+                || hyph == INSERT_ZWJ_AND_HYPHEN_AT_END
+                || hyph == INSERT_HYPHEN_AT_START
+                || hyph == INSERT_ZWJ_AT_START);
+    }
+
+    const static uint32_t* getHyphenString(uint32_t hyph);
+    static uint32_t editForThisLine(HyphenationType type);
+    static uint32_t editForNextLine(HyphenationType type);
+
+    HyphenEdit() : hyphen(NO_EDIT) { }
+    HyphenEdit(uint32_t hyphenInt) : hyphen(hyphenInt) { }  // NOLINT(implicit)
+    uint32_t getHyphen() const { return hyphen; }
+    bool operator==(const HyphenEdit &other) const { return hyphen == other.hyphen; }
+
+    uint32_t getEnd() const { return hyphen & MASK_END_OF_LINE; }
+    uint32_t getStart() const { return hyphen & MASK_START_OF_LINE; }
+
+private:
+    uint32_t hyphen;
+};
 
 // hyb file header; implementation details are in the .cpp file
 struct Header;
 
 class Hyphenator {
 public:
-    // Note: this will also require a locale, for proper case folding behavior
-    static Hyphenator* load(const uint16_t* patternData, size_t size);
+    // Compute the hyphenation of a word, storing the hyphenation in result vector. Each entry in
+    // the vector is a "hyphenation type" for a potential hyphenation that can be applied at the
+    // corresponding code unit offset in the word.
+    //
+    // Example: word is "hyphen", result is the following, corresponding to "hy-phen":
+    // [DONT_BREAK, DONT_BREAK, BREAK_AND_INSERT_HYPHEN, DONT_BREAK, DONT_BREAK, DONT_BREAK]
+    void hyphenate(std::vector<HyphenationType>* result, const uint16_t* word, size_t len,
+            const icu::Locale& locale);
 
-    // Compute the hyphenation of a word, storing the hyphenation in result vector. Each
-    // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit
-    // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break,
-    // but this will be expanded to other edits for nonstandard hyphenation.
-    // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen".
-    void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+    // Returns true if the codepoint is like U+2010 HYPHEN in line breaking and usage: a character
+    // immediately after which line breaks are allowed, but words containing it should not be
+    // automatically hyphenated.
+    static bool isLineBreakingHyphen(uint32_t cp);
 
     // pattern data is in binary format, as described in doc/hyb_file_format.md. Note:
     // the caller is responsible for ensuring that the lifetime of the pattern data is
     // at least as long as the Hyphenator object.
 
-    // Note: nullptr is valid input, in which case the hyphenator only processes soft hyphens
-    static Hyphenator* loadBinary(const uint8_t* patternData);
+    // Note: nullptr is valid input, in which case the hyphenator only processes soft hyphens.
+    static Hyphenator* loadBinary(const uint8_t* patternData, size_t minPrefix, size_t minSuffix);
 
 private:
-    // apply soft hyphens only, ignoring patterns
-    void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len);
+    // apply various hyphenation rules including hard and soft hyphens, ignoring patterns
+    void hyphenateWithNoPatterns(HyphenationType* result, const uint16_t* word, size_t len,
+            const icu::Locale& locale);
 
-    // try looking up word in alphabet table, return false if any code units fail to map
-    // Note that this methor writes len+2 entries into alpha_codes (including start and stop)
-    bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len);
+    // Try looking up word in alphabet table, return DONT_BREAK if any code units fail to map.
+    // Otherwise, returns BREAK_AND_INSERT_HYPHEN, BREAK_AND_INSERT_ARMENIAN_HYPHEN, or
+    // BREAK_AND_DONT_INSERT_HYPHEN based on the the script of the characters seen.
+    // Note that this method writes len+2 entries into alpha_codes (including start and stop)
+    HyphenationType alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len);
 
     // calculate hyphenation from patterns, assuming alphabet lookup has already been done
-    void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len);
-
-    // TODO: these should become parameters, as they might vary by locale, screen size, and
-    // possibly explicit user control.
-    static const int MIN_PREFIX = 2;
-    static const int MIN_SUFFIX = 3;
+    void hyphenateFromCodes(HyphenationType* result, const uint16_t* codes, size_t len,
+            HyphenationType hyphenValue);
 
     // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is used so
     // that temporary buffers can be stack-allocated without waste, which is a slightly
@@ -70,6 +159,7 @@
     static const size_t MAX_HYPHENATED_SIZE = 64;
 
     const uint8_t* patternData;
+    size_t minPrefix, minSuffix;
 
     // accessors for binary data
     const Header* getHeader() const {
@@ -78,6 +168,6 @@
 
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif   // MINIKIN_HYPHENATOR_H

diff --git a/include/minikin/Layout.h b/include/minikin/Layout.h
index d9bb01f..6d1de2f 100644
--- a/include/minikin/Layout.h
+++ b/include/minikin/Layout.h

@@ -19,32 +19,13 @@
 
 #include <hb.h>
 
+#include <memory>
 #include <vector>
 
 #include <minikin/FontCollection.h>
-#include <minikin/MinikinFontFreeType.h>
 
 namespace minikin {
 
-// The Bitmap class is for debugging. We'll probably move it out
-// of here into a separate lightweight software rendering module
-// (optional, as we'd hope most clients would do their own)
-class Bitmap {
-public:
-    Bitmap(int width, int height);
-    ~Bitmap();
-    void writePnm(std::ofstream& o) const;
-    void drawGlyph(const android::GlyphBitmap& bitmap, int x, int y);
-private:
-    int width;
-    int height;
-    uint8_t* buf;
-};
-
-} // namespace minikin
-
-namespace android {
-
 struct LayoutGlyph {
     // index into mFaces and mHbFonts vectors. We could imagine
     // moving this into a run length representation, because it's
@@ -75,37 +56,32 @@
 // Lifecycle and threading assumptions for Layout:
 // The object is assumed to be owned by a single thread; multiple threads
 // may not mutate it at the same time.
-// The lifetime of the FontCollection set through setFontCollection must
-// extend through the lifetime of the Layout object.
 class Layout {
 public:
 
-    Layout() : mGlyphs(), mAdvances(), mCollection(0), mFaces(), mAdvance(0), mBounds() {
+    Layout() : mGlyphs(), mAdvances(), mFaces(), mAdvance(0), mBounds() {
         mBounds.setEmpty();
     }
 
-    // Clears layout, ready to be used again
-    void reset();
+    Layout(Layout&& layout) = default;
+
+    // Forbid copying and assignment.
+    Layout(const Layout&) = delete;
+    void operator=(const Layout&) = delete;
 
     void dump() const;
-    void setFontCollection(const FontCollection* collection);
 
     void doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        int bidiFlags, const FontStyle &style, const MinikinPaint &paint);
+        int bidiFlags, const FontStyle &style, const MinikinPaint &paint,
+        const std::shared_ptr<FontCollection>& collection);
 
     static float measureText(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
         int bidiFlags, const FontStyle &style, const MinikinPaint &paint,
-        const FontCollection* collection, float* advances);
-
-    void draw(minikin::Bitmap*, int x0, int y0, float size) const;
-
-    // Deprecated. Nont needed. Remove when callers are removed.
-    static void init();
+        const std::shared_ptr<FontCollection>& collection, float* advances);
 
     // public accessors
     size_t nGlyphs() const;
-    // Does not bump reference; ownership is still layout
-    MinikinFont *getFont(int i) const;
+    const MinikinFont* getFont(int i) const;
     FontFakery getFakery(int i) const;
     unsigned int getGlyphId(int i) const;
     float getX(int i) const;
@@ -121,7 +97,7 @@
     // start and count are the parameters to doLayout
     float getCharAdvance(size_t i) const { return mAdvances[i]; }
 
-    void getBounds(MinikinRect* rect);
+    void getBounds(MinikinRect* rect) const;
 
     // Purge all caches, useful in low memory conditions
     static void purgeCaches();
@@ -130,36 +106,38 @@
     friend class LayoutCacheKey;
 
     // Find a face in the mFaces vector, or create a new entry
-    int findFace(FakedFont face, LayoutContext* ctx);
+    int findFace(const FakedFont& face, LayoutContext* ctx);
+
+    // Clears layout, ready to be used again
+    void reset();
 
     // Lay out a single bidi run
     // When layout is not null, layout info will be stored in the object.
     // When advances is not null, measurement results will be stored in the array.
     static float doLayoutRunCached(const uint16_t* buf, size_t runStart, size_t runLength,
         size_t bufSize, bool isRtl, LayoutContext* ctx, size_t dstStart,
-        const FontCollection* collection, Layout* layout, float* advances);
+        const std::shared_ptr<FontCollection>& collection, Layout* layout, float* advances);
 
     // Lay out a single word
     static float doLayoutWord(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        bool isRtl, LayoutContext* ctx, size_t bufStart, const FontCollection* collection,
-        Layout* layout, float* advances);
+        bool isRtl, LayoutContext* ctx, size_t bufStart,
+        const std::shared_ptr<FontCollection>& collection, Layout* layout, float* advances);
 
     // Lay out a single bidi run
     void doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        bool isRtl, LayoutContext* ctx);
+        bool isRtl, LayoutContext* ctx, const std::shared_ptr<FontCollection>& collection);
 
     // Append another layout (for example, cached value) into this one
-    void appendLayout(Layout* src, size_t start);
+    void appendLayout(Layout* src, size_t start, float extraAdvance);
 
     std::vector<LayoutGlyph> mGlyphs;
     std::vector<float> mAdvances;
 
-    const FontCollection* mCollection;
     std::vector<FakedFont> mFaces;
     float mAdvance;
     MinikinRect mBounds;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_LAYOUT_H

diff --git a/include/minikin/LineBreaker.h b/include/minikin/LineBreaker.h
index 1d81404..c91c0b3 100644
--- a/include/minikin/LineBreaker.h
+++ b/include/minikin/LineBreaker.h

@@ -29,7 +29,7 @@
 #include "minikin/Hyphenator.h"
 #include "minikin/WordBreaker.h"
 
-namespace android {
+namespace minikin {
 
 enum BreakStrategy {
     kBreakStrategy_Greedy = 0,
@@ -147,6 +147,8 @@
 
         void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
 
+        void setJustified(bool justified) { mJustified = justified; }
+
         HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
 
         void setHyphenationFrequency(HyphenationFrequency frequency) {
@@ -157,8 +159,8 @@
         // Minikin to do the shaping of the strings. The main thing that would need to be changed
         // is having some kind of callback (or virtual class, or maybe even template), which could
         // easily be instantiated with Minikin's Layout. Future work for when needed.
-        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
-                size_t start, size_t end, bool isRtl);
+        float addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
+                FontStyle style, size_t start, size_t end, bool isRtl);
 
         void addReplacement(size_t start, size_t end, float width);
 
@@ -189,23 +191,28 @@
         struct Candidate {
             size_t offset;  // offset to text buffer, in code units
             size_t prev;  // index to previous break
-            ParaWidth preBreak;
-            ParaWidth postBreak;
+            ParaWidth preBreak;  // width of text until this point, if we decide to not break here
+            ParaWidth postBreak;  // width of text until this point, if we decide to break here
             float penalty;  // penalty of this break (for example, hyphen penalty)
             float score;  // best score found for this break
             size_t lineNumber;  // only updated for non-constant line widths
-            uint8_t hyphenEdit;
+            size_t preSpaceCount;  // preceding space count before breaking
+            size_t postSpaceCount;  // preceding space count after breaking
+            HyphenationType hyphenType;
         };
 
         float currentLineWidth() const;
 
-        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
-                uint8_t hyph);
+        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
+                size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph);
 
         void addCandidate(Candidate cand);
+        void pushGreedyBreak();
 
         // push an actual break to the output. Takes care of setting flags for tab
-        void pushBreak(int offset, float width, uint8_t hyph);
+        void pushBreak(int offset, float width, uint8_t hyphenEdit);
+
+        float getSpaceWidth() const;
 
         void computeBreaksGreedy();
 
@@ -214,15 +221,17 @@
         void finishBreaksOptimal();
 
         WordBreaker mWordBreaker;
+        icu::Locale mLocale;
         std::vector<uint16_t>mTextBuf;
         std::vector<float>mCharWidths;
 
         Hyphenator* mHyphenator;
-        std::vector<uint8_t> mHyphBuf;
+        std::vector<HyphenationType> mHyphBuf;
 
         // layout parameters
         BreakStrategy mStrategy = kBreakStrategy_Greedy;
         HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
+        bool mJustified;
         LineWidths mLineWidths;
         TabStops mTabStops;
 
@@ -240,9 +249,11 @@
         size_t mBestBreak;
         float mBestScore;
         ParaWidth mPreBreak;  // prebreak of last break
+        uint32_t mLastHyphenation;  // hyphen edit of last break kept for next line
         int mFirstTabIndex;
+        size_t mSpaceCount;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_LINE_BREAKER_H

diff --git a/include/minikin/Measurement.h b/include/minikin/Measurement.h
index 7bcab66..b00c212 100644
--- a/include/minikin/Measurement.h
+++ b/include/minikin/Measurement.h

@@ -19,7 +19,7 @@
 
 #include <minikin/Layout.h>
 
-namespace android {
+namespace minikin {
 
 float getRunAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count,
         size_t offset);
@@ -27,6 +27,6 @@
 size_t getOffsetForAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count,
         float advance);
 
-}
+}  // namespace minikin
 
 #endif  // MINIKIN_MEASUREMENT_H

diff --git a/include/minikin/MinikinFont.h b/include/minikin/MinikinFont.h
index 4951514..01af786 100644
--- a/include/minikin/MinikinFont.h
+++ b/include/minikin/MinikinFont.h

@@ -18,35 +18,23 @@
 #define MINIKIN_FONT_H
 
 #include <string>
+#include <memory>
 
-#include <minikin/MinikinRefCounted.h>
 #include <minikin/FontFamily.h>
+#include <minikin/Hyphenator.h>
 
 // An abstraction for platform fonts, allowing Minikin to be used with
 // multiple actual implementations of fonts.
 
-namespace android {
-
-// The hyphen edit represents an edit to the string when a word is
-// hyphenated. The most common hyphen edit is adding a "-" at the end
-// of a syllable, but nonstandard hyphenation allows for more choices.
-class HyphenEdit {
-public:
-    HyphenEdit() : hyphen(0) { }
-    HyphenEdit(uint32_t hyphenInt) : hyphen(hyphenInt) { }
-    bool hasHyphen() const { return hyphen != 0; }
-    bool operator==(const HyphenEdit &other) const { return hyphen == other.hyphen; }
-private:
-    uint32_t hyphen;
-};
+namespace minikin {
 
 class MinikinFont;
 
 // Possibly move into own .h file?
 // Note: if you add a field here, either add it to LayoutCacheKey or to skipCache()
 struct MinikinPaint {
-    MinikinPaint() : font(0), size(0), scaleX(0), skewX(0), letterSpacing(0), paintFlags(0),
-            fakery(), fontFeatureSettings() { }
+    MinikinPaint() : font(nullptr), size(0), scaleX(0), skewX(0), letterSpacing(0), wordSpacing(0),
+            paintFlags(0), fakery(), hyphenEdit(), fontFeatureSettings() { }
 
     bool skipCache() const {
         return !fontFeatureSettings.empty();
@@ -57,6 +45,7 @@
     float scaleX;
     float skewX;
     float letterSpacing;
+    float wordSpacing;
     uint32_t paintFlags;
     FontFakery fakery;
     HyphenEdit hyphenEdit;
@@ -92,14 +81,12 @@
     void join(const MinikinRect& r);
 };
 
-class MinikinFontFreeType;
-
 // Callback for freeing data
 typedef void (*MinikinDestroyFunc) (void* data);
 
-class MinikinFont : public MinikinRefCounted {
+class MinikinFont {
 public:
-    MinikinFont(int32_t uniqueId) : mUniqueId(uniqueId) {}
+    explicit MinikinFont(int32_t uniqueId) : mUniqueId(uniqueId) {}
 
     virtual ~MinikinFont();
 
@@ -109,8 +96,6 @@
     virtual void GetBounds(MinikinRect* bounds, uint32_t glyph_id,
         const MinikinPaint &paint) const = 0;
 
-    virtual const void* GetTable(uint32_t tag, size_t* size, MinikinDestroyFunc* destroy) = 0;
-
     // Override if font can provide access to raw data
     virtual const void* GetFontData() const {
         return nullptr;
@@ -127,6 +112,13 @@
         return 0;
     }
 
+    virtual const std::vector<minikin::FontVariation>& GetAxes() const = 0;
+
+    virtual std::shared_ptr<MinikinFont> createFontWithVariation(
+            const std::vector<FontVariation>&) const {
+        return nullptr;
+    }
+
     static uint32_t MakeTag(char c1, char c2, char c3, char c4) {
         return ((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) |
             ((uint32_t)c3 << 8) | (uint32_t)c4;
@@ -137,6 +129,6 @@
     const int32_t mUniqueId;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_FONT_H

diff --git a/include/minikin/MinikinFontFreeType.h b/include/minikin/MinikinFontFreeType.h
deleted file mode 100644
index baa08df..0000000
--- a/include/minikin/MinikinFontFreeType.h
+++ /dev/null

@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINIKIN_FONT_FREETYPE_H
-#define MINIKIN_FONT_FREETYPE_H
-
-#include <ft2build.h>
-#include FT_FREETYPE_H
-#include FT_TRUETYPE_TABLES_H
-
-#include <minikin/MinikinFont.h>
-
-// An abstraction for platform fonts, allowing Minikin to be used with
-// multiple actual implementations of fonts.
-
-namespace android {
-
-struct GlyphBitmap {
-    uint8_t *buffer;
-    int width;
-    int height;
-    int left;
-    int top;
-};
-
-class MinikinFontFreeType : public MinikinFont {
-public:
-    explicit MinikinFontFreeType(FT_Face typeface);
-
-    ~MinikinFontFreeType();
-
-    float GetHorizontalAdvance(uint32_t glyph_id,
-        const MinikinPaint &paint) const;
-
-    void GetBounds(MinikinRect* bounds, uint32_t glyph_id,
-        const MinikinPaint& paint) const;
-
-    const void* GetTable(uint32_t tag, size_t* size, MinikinDestroyFunc* destroy);
-
-    // TODO: provide access to raw data, as an optimization.
-
-    // Not a virtual method, as the protocol to access rendered
-    // glyph bitmaps is probably different depending on the
-    // backend.
-    bool Render(uint32_t glyph_id,
-        const MinikinPaint &paint, GlyphBitmap *result);
-
-    MinikinFontFreeType* GetFreeType();
-
-private:
-    FT_Face mTypeface;
-    static int32_t sIdCounter;
-};
-
-}  // namespace android
-
-#endif  // MINIKIN_FONT_FREETYPE_H

diff --git a/include/minikin/MinikinRefCounted.h b/include/minikin/MinikinRefCounted.h
deleted file mode 100644
index 603aff0..0000000
--- a/include/minikin/MinikinRefCounted.h
+++ /dev/null

@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Base class for reference counted objects in Minikin
-
-#ifndef MINIKIN_REF_COUNTED_H
-#define MINIKIN_REF_COUNTED_H
-
-namespace android {
-
-class MinikinRefCounted {
-public:
-    void RefLocked() { mRefcount_++; }
-    void UnrefLocked() { if (--mRefcount_ == 0) { delete this; } }
-
-    // These refcount operations take the global lock.
-    void Ref();
-    void Unref();
-
-    MinikinRefCounted() : mRefcount_(1) { }
-
-    virtual ~MinikinRefCounted() { };
-private:
-    int mRefcount_;
-};
-
-// An RAII container for reference counted objects.
-// Note: this is only suitable for clients which are _not_ holding the global lock.
-template <typename T>
-class MinikinAutoUnref {
-public:
-    MinikinAutoUnref(T* obj) : mObj(obj) {
-    }
-    ~MinikinAutoUnref() {
-        mObj->Unref();
-    }
-    T& operator*() const { return *mObj; }
-    T* operator->() const { return mObj; }
-    T* get() const { return mObj; }
-private:
-    T* mObj;
-};
-
-}
-
-#endif   // MINIKIN_REF_COUNTED_H
\ No newline at end of file

diff --git a/include/minikin/SparseBitSet.h b/include/minikin/SparseBitSet.h
index 72b8305..62aece2 100644
--- a/include/minikin/SparseBitSet.h
+++ b/include/minikin/SparseBitSet.h

@@ -19,35 +19,37 @@
 
 #include <stdint.h>
 #include <sys/types.h>
-#include <UniquePtr.h>
+
+#include <memory>
 
 // ---------------------------------------------------------------------------
 
-namespace android {
+namespace minikin {
 
 // This is an implementation of a set of integers. It is optimized for
 // values that are somewhat sparse, in the ballpark of a maximum value
 // of thousands to millions. It is particularly efficient when there are
 // large gaps. The motivating example is Unicode coverage of a font, but
 // the abstraction itself is fully general.
-
 class SparseBitSet {
 public:
-    SparseBitSet(): mMaxVal(0) {
-    }
-
-    // Clear the set
-    void clear();
+    // Create an empty bit set.
+    SparseBitSet() : mMaxVal(0) {}
 
     // Initialize the set to a new value, represented by ranges. For
     // simplicity, these ranges are arranged as pairs of values,
     // inclusive of start, exclusive of end, laid out in a uint32 array.
-    void initFromRanges(const uint32_t* ranges, size_t nRanges);
+    SparseBitSet(const uint32_t* ranges, size_t nRanges) : SparseBitSet() {
+        initFromRanges(ranges, nRanges);
+    }
+
+    SparseBitSet(SparseBitSet&&) = default;
+    SparseBitSet& operator=(SparseBitSet&&) = default;
 
     // Determine whether the value is included in the set
     bool get(uint32_t ch) const {
         if (ch >= mMaxVal) return false;
-        uint32_t *bitmap = &mBitmaps[mIndices[ch >> kLogValuesPerPage]];
+        const uint32_t *bitmap = &mBitmaps[mIndices[ch >> kLogValuesPerPage]];
         uint32_t index = ch & kPageMask;
         return (bitmap[index >> kLogBitsPerEl] & (kElFirst >> (index & kElMask))) != 0;
     }
@@ -64,6 +66,9 @@
     static const uint32_t kNotFound = ~0u;
 
 private:
+    void initFromRanges(const uint32_t* ranges, size_t nRanges);
+
+    static const uint32_t kMaximumCapacity = 0xFFFFFF;
     static const int kLogValuesPerPage = 8;
     static const int kPageMask = (1 << kLogValuesPerPage) - 1;
     static const int kLogBytesPerEl = 2;
@@ -73,20 +78,22 @@
     typedef uint32_t element;
     static const element kElAllOnes = ~((element)0);
     static const element kElFirst = ((element)1) << kElMask;
-    static const uint32_t noZeroPage = ~0u;
+    static const uint16_t noZeroPage = 0xFFFF;
 
     static uint32_t calcNumPages(const uint32_t* ranges, size_t nRanges);
     static int CountLeadingZeros(element x);
 
     uint32_t mMaxVal;
-    UniquePtr<uint32_t[]> mIndices;
-    UniquePtr<element[]> mBitmaps;
-    uint32_t mZeroPageIndex;
+
+    std::unique_ptr<uint16_t[]> mIndices;
+    std::unique_ptr<element[]> mBitmaps;
+    uint16_t mZeroPageIndex;
+
+    // Forbid copy and assign.
+    SparseBitSet(const SparseBitSet&) = delete;
+    void operator=(const SparseBitSet&) = delete;
 };
 
-// Note: this thing cannot be used in vectors yet. If that were important, we'd need to
-// make the copy constructor work, and probably set up move traits as well.
-
-}; // namespace android
+}  // namespace minikin
 
 #endif // MINIKIN_SPARSE_BIT_SET_H

diff --git a/include/minikin/WordBreaker.h b/include/minikin/WordBreaker.h
index 4eff9d1..6971ce2 100644
--- a/include/minikin/WordBreaker.h
+++ b/include/minikin/WordBreaker.h

@@ -26,7 +26,7 @@
 #include "unicode/brkiter.h"
 #include <memory>
 
-namespace android {
+namespace minikin {
 
 class WordBreaker {
 public:
@@ -55,6 +55,10 @@
     void finish();
 
 private:
+    int32_t iteratorNext();
+    void detectEmailOrUrl();
+    ssize_t findNextBreakInEmailOrUrl();
+
     std::unique_ptr<icu::BreakIterator> mBreakIterator;
     UText mUText = UTEXT_INITIALIZER;
     const uint16_t* mText = nullptr;
@@ -68,6 +72,6 @@
     bool mInEmailOrUrl;
 };
 
-}  // namespace
+}  // namespace minikin
 
 #endif  // MINIKIN_WORD_BREAKER_H

diff --git a/libs/minikin/AnalyzeStyle.cpp b/libs/minikin/AnalyzeStyle.cpp
deleted file mode 100644
index 0961645..0000000
--- a/libs/minikin/AnalyzeStyle.cpp
+++ /dev/null

@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#include <minikin/AnalyzeStyle.h>
-
-namespace android {
-
-// should  we have a single FontAnalyzer class this stuff lives in, to avoid dup?
-static int32_t readU16(const uint8_t* data, size_t offset) {
-    return data[offset] << 8 | data[offset + 1];
-}
-
-bool analyzeStyle(const uint8_t* os2_data, size_t os2_size, int* weight, bool* italic) {
-    const size_t kUsWeightClassOffset = 4;
-    const size_t kFsSelectionOffset = 62;
-    const uint16_t kItalicFlag = (1 << 0);
-    if (os2_size < kFsSelectionOffset + 2) {
-        return false;
-    }
-    uint16_t weightClass = readU16(os2_data, kUsWeightClassOffset);
-    *weight = weightClass / 100;
-    uint16_t fsSelection = readU16(os2_data, kFsSelectionOffset);
-    *italic = (fsSelection & kItalicFlag) != 0;
-    return true;
-}
-
-}  // namespace android

diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk
index 9d82579..bb6234a 100644
--- a/libs/minikin/Android.mk
+++ b/libs/minikin/Android.mk

@@ -15,27 +15,16 @@
 LOCAL_PATH := $(call my-dir)
 
 include $(CLEAR_VARS)
-# Generate unicode emoji data from UCD.
-UNICODE_EMOJI_H_GEN_PY := $(LOCAL_PATH)/unicode_emoji_h_gen.py
-UNICODE_EMOJI_DATA := $(TOP)/external/unicode/emoji-data.txt
-
-UNICODE_EMOJI_H := $(intermediates)/generated/UnicodeData.h
-$(UNICODE_EMOJI_H): $(UNICODE_EMOJI_H_GEN_PY) $(UNICODE_EMOJI_DATA)
-$(LOCAL_PATH)/MinikinInternal.cpp: $(UNICODE_EMOJI_H)
-$(UNICODE_EMOJI_H): PRIVATE_CUSTOM_TOOL := python $(UNICODE_EMOJI_H_GEN_PY) \
-    -i $(UNICODE_EMOJI_DATA) \
-    -o $(UNICODE_EMOJI_H)
-$(UNICODE_EMOJI_H):
-		$(transform-generated-source)
 
 include $(CLEAR_VARS)
 minikin_src_files := \
-    AnalyzeStyle.cpp \
     CmapCoverage.cpp \
+    Emoji.cpp \
     FontCollection.cpp \
     FontFamily.cpp \
     FontLanguage.cpp \
     FontLanguageListCache.cpp \
+    FontUtils.cpp \
     GraphemeBreak.cpp \
     HbFontCache.cpp \
     Hyphenator.cpp \
@@ -44,15 +33,12 @@
     LineBreaker.cpp \
     Measurement.cpp \
     MinikinInternal.cpp \
-    MinikinRefCounted.cpp \
     MinikinFont.cpp \
-    MinikinFontFreeType.cpp \
     SparseBitSet.cpp \
     WordBreaker.cpp
 
 minikin_c_includes := \
     external/harfbuzz_ng/src \
-    external/freetype/include \
     frameworks/minikin/include \
     $(intermediates)
 
@@ -109,7 +95,7 @@
 LOCAL_EXPORT_C_INCLUDE_DIRS := frameworks/minikin/include
 LOCAL_C_INCLUDES := $(minikin_c_includes)
 LOCAL_CPPFLAGS += -Werror -Wall -Wextra $(enable_race_detection)
-LOCAL_SHARED_LIBRARIES := liblog libicuuc-host
+LOCAL_SHARED_LIBRARIES := liblog libicuuc
 
 LOCAL_SRC_FILES := Hyphenator.cpp
 

diff --git a/libs/minikin/CmapCoverage.cpp b/libs/minikin/CmapCoverage.cpp
index c02526c..c56d07c 100644
--- a/libs/minikin/CmapCoverage.cpp
+++ b/libs/minikin/CmapCoverage.cpp

@@ -17,21 +17,33 @@
 // Determine coverage of font given its raw "cmap" OpenType table
 
 #define LOG_TAG "Minikin"
-#include <cutils/log.h>
 
+#include <algorithm>
 #include <vector>
 using std::vector;
 
+#include <log/log.h>
+
 #include <minikin/SparseBitSet.h>
 #include <minikin/CmapCoverage.h>
+#include "MinikinInternal.h"
 
-namespace android {
+#include <MinikinInternal.h>
+
+namespace minikin {
+
+constexpr uint32_t U32MAX = std::numeric_limits<uint32_t>::max();
 
 // These could perhaps be optimized to use __builtin_bswap16 and friends.
 static uint32_t readU16(const uint8_t* data, size_t offset) {
     return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
 }
 
+static uint32_t readU24(const uint8_t* data, size_t offset) {
+    return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 |
+        ((uint32_t)data[offset + 2]);
+}
+
 static uint32_t readU32(const uint8_t* data, size_t offset) {
     return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
         ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
@@ -59,6 +71,99 @@
     }
 }
 
+struct Range {
+    uint32_t start;  // inclusive
+    uint32_t end;  // exclusive
+
+    static Range InvalidRange() {
+        return Range({ U32MAX, U32MAX });
+    }
+
+    inline bool isValid() const {
+        return start != U32MAX && end != U32MAX;
+    }
+
+    // Returns true if left and right intersect.
+    inline static bool intersects(const Range& left, const Range& right) {
+        return left.isValid() && right.isValid() &&
+                left.start < right.end && right.start < left.end;
+    }
+
+    // Returns merged range. This method assumes left and right are not invalid ranges and they have
+    // an intersection.
+    static Range merge(const Range& left, const Range& right) {
+        return Range({ std::min(left.start, right.start), std::max(left.end, right.end) });
+    }
+};
+
+// Returns Range from given ranges vector. Returns InvalidRange if i is out of range.
+static inline Range getRange(const std::vector<uint32_t>& r, size_t i) {
+    return i + 1 < r.size() ? Range({ r[i], r[i + 1] }) : Range::InvalidRange();
+}
+
+// Merge two sorted lists of ranges into one sorted list.
+static std::vector<uint32_t> mergeRanges(
+        const std::vector<uint32_t>& lRanges, const std::vector<uint32_t>& rRanges) {
+    std::vector<uint32_t> out;
+
+    const size_t lsize = lRanges.size();
+    const size_t rsize = rRanges.size();
+    out.reserve(lsize + rsize);
+    size_t ri = 0;
+    size_t li = 0;
+    while (li < lsize || ri < rsize) {
+        Range left = getRange(lRanges, li);
+        Range right = getRange(rRanges, ri);
+
+        if (!right.isValid()) {
+            // No ranges left in rRanges. Just put all remaining ranges in lRanges.
+            do {
+                Range r = getRange(lRanges, li);
+                addRange(out, r.start, r.end);  // Input is sorted. Never returns false.
+                li += 2;
+            } while (li < lsize);
+            break;
+        } else if (!left.isValid()) {
+            // No ranges left in lRanges. Just put all remaining ranges in rRanges.
+            do {
+                Range r = getRange(rRanges, ri);
+                addRange(out, r.start, r.end);  // Input is sorted. Never returns false.
+                ri += 2;
+            } while (ri < rsize);
+            break;
+        } else if (!Range::intersects(left, right)) {
+            // No intersection. Add smaller range.
+            if (left.start < right.start) {
+                addRange(out, left.start, left.end);  // Input is sorted. Never returns false.
+                li += 2;
+            } else {
+                addRange(out, right.start, right.end);  // Input is sorted. Never returns false.
+                ri += 2;
+            }
+        } else {
+            Range merged = Range::merge(left, right);
+            li += 2;
+            ri += 2;
+            left = getRange(lRanges, li);
+            right = getRange(rRanges, ri);
+            while (Range::intersects(merged, left) || Range::intersects(merged, right)) {
+                if (Range::intersects(merged, left)) {
+                    merged = Range::merge(merged, left);
+                    li += 2;
+                    left = getRange(lRanges, li);
+                } else {
+                    merged = Range::merge(merged, right);
+                    ri += 2;
+                    right = getRange(rRanges, ri);
+                }
+            }
+            addRange(out, merged.start, merged.end);  // Input is sorted. Never returns false.
+        }
+    }
+
+    return out;
+}
+
 // Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
 static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
     const size_t kSegCountOffset = 6;
@@ -142,6 +247,15 @@
             android_errorWriteLog(0x534e4554, "26413177");
             return false;
         }
+
+        // No need to read outside of Unicode code point range.
+        if (start > MAX_UNICODE_CODE_POINT) {
+            return true;
+        }
+        if (end > MAX_UNICODE_CODE_POINT) {
+            // file is inclusive, vector is exclusive
+            return addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
+        }
         if (!addRange(coverage, start, end + 1)) {  // file is inclusive, vector is exclusive
             return false;
         }
@@ -149,76 +263,279 @@
     return true;
 }
 
-bool CmapCoverage::getCoverage(SparseBitSet& coverage, const uint8_t* cmap_data, size_t cmap_size,
-        bool* has_cmap_format14_subtable) {
-    vector<uint32_t> coverageVec;
-    const size_t kHeaderSize = 4;
-    const size_t kNumTablesOffset = 2;
-    const size_t kTableSize = 8;
-    const size_t kPlatformIdOffset = 0;
-    const size_t kEncodingIdOffset = 2;
-    const size_t kOffsetOffset = 4;
-    const uint16_t kUnicodePlatformId = 0;
-    const uint16_t kMicrosoftPlatformId = 3;
-    const uint16_t kUnicodeBmpEncodingId = 1;
-    const uint16_t kVariationSequencesEncodingId = 5;
-    const uint16_t kUnicodeUcs4EncodingId = 10;
-    const uint32_t kNoTable = UINT32_MAX;
-    if (kHeaderSize > cmap_size) {
-        return false;
+// Lower value has higher priority. 0 for the highest priority table.
+// kLowestPriority for unsupported tables.
+// This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
+constexpr uint8_t kLowestPriority = 255;
+uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
+    if (platformId == 3 && encodingId == 10) {
+        return 0;
     }
-    uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
-    if (kHeaderSize + numTables * kTableSize > cmap_size) {
-        return false;
+    if (platformId == 0 && encodingId == 6) {
+        return 1;
     }
-    uint32_t bestTable = kNoTable;
-    bool hasCmapFormat14Subtable = false;
-    for (uint32_t i = 0; i < numTables; i++) {
-        uint16_t platformId = readU16(cmap_data, kHeaderSize + i * kTableSize + kPlatformIdOffset);
-        uint16_t encodingId = readU16(cmap_data, kHeaderSize + i * kTableSize + kEncodingIdOffset);
-        if (platformId == kMicrosoftPlatformId && encodingId == kUnicodeUcs4EncodingId) {
-            bestTable = i;
-            break;
-        } else if (platformId == kMicrosoftPlatformId && encodingId == kUnicodeBmpEncodingId) {
-            bestTable = i;
-        } else if (platformId == kUnicodePlatformId &&
-                encodingId == kVariationSequencesEncodingId) {
-            uint32_t offset = readU32(cmap_data, kHeaderSize + i * kTableSize + kOffsetOffset);
-            if (offset <= cmap_size - 2 && readU16(cmap_data, offset) == 14) {
-                hasCmapFormat14Subtable = true;
+    if (platformId == 0 && encodingId == 4) {
+        return 2;
+    }
+    if (platformId == 3 && encodingId == 1) {
+        return 3;
+    }
+    if (platformId == 0 && encodingId == 3) {
+        return 4;
+    }
+    if (platformId == 0 && encodingId == 2) {
+        return 5;
+    }
+    if (platformId == 0 && encodingId == 1) {
+        return 6;
+    }
+    if (platformId == 0 && encodingId == 0) {
+        return 7;
+    }
+    // Tables other than above are not supported.
+    return kLowestPriority;
+}
+
+// Get merged coverage information from default UVS Table and non-default UVS Table. Note that this
+// function assumes code points in both default UVS Table and non-default UVS table are stored in
+// ascending order. This is required by the standard.
+static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size,
+        uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset,
+        const SparseBitSet& baseCoverage) {
+    // Need to merge supported ranges from default UVS Table and non-default UVS Table.
+    // First, collect all supported code points from non default UVS table.
+    std::vector<uint32_t> rangesFromNonDefaultUVSTable;
+    if (nonDefaultUVSTableOffset != 0) {
+        constexpr size_t kHeaderSize = 4;
+        constexpr size_t kUVSMappingRecordSize = 5;
+
+        const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset;
+        // This subtraction doesn't underflow since the caller already checked
+        // size > nonDefaultUVSTableOffset.
+        const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset;
+        if (nonDefaultUVSTableRemaining < kHeaderSize) {
+            return false;
+        }
+        const uint32_t numRecords = readU32(nonDefaultUVSTable, 0);
+        if (numRecords * kUVSMappingRecordSize + kHeaderSize > nonDefaultUVSTableRemaining) {
+            return false;
+        }
+        for (uint32_t i = 0; i < numRecords; ++i) {
+            const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i;
+            const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset);
+            if (!addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1)) {
+                return false;
             }
         }
     }
-    *has_cmap_format14_subtable = hasCmapFormat14Subtable;
-#ifdef VERBOSE_DEBUG
-    ALOGD("best table = %d\n", bestTable);
-#endif
-    if (bestTable == kNoTable) {
-        return false;
+
+    // Then, construct range from default UVS Table with merging code points from non default UVS
+    // table.
+    std::vector<uint32_t> rangesFromDefaultUVSTable;
+    if (defaultUVSTableOffset != 0) {
+        constexpr size_t kHeaderSize = 4;
+        constexpr size_t kUnicodeRangeRecordSize = 4;
+
+        const uint8_t* defaultUVSTable = data + defaultUVSTableOffset;
+        // This subtraction doesn't underflow since the caller already checked
+        // size > defaultUVSTableOffset.
+        const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset;
+
+        if (defaultUVSTableRemaining < kHeaderSize) {
+            return false;
+        }
+        const uint32_t numRecords = readU32(defaultUVSTable, 0);
+        if (numRecords * kUnicodeRangeRecordSize + kHeaderSize > defaultUVSTableRemaining) {
+            return false;
+        }
+
+        for (uint32_t i = 0; i < numRecords; ++i) {
+            const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i;
+            const uint32_t startCp = readU24(defaultUVSTable, recordOffset);
+            const uint8_t rangeLength = defaultUVSTable[recordOffset + 3];
+
+            // Then insert range from default UVS Table, but exclude if the base codepoint is not
+            // supported.
+            for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) {
+                // All codepoints in default UVS table should go to the glyphs of the codepoints
+                // without variation selectors. We need to check the default glyph availability and
+                // exclude the codepoint if it is not supported by defualt cmap table.
+                if (baseCoverage.get(cp)) {
+                    if (!addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */)) {
+                        return false;
+                    }
+                }
+            }
+        }
     }
-    uint32_t offset = readU32(cmap_data, kHeaderSize + bestTable * kTableSize + kOffsetOffset);
-    if (offset > cmap_size - 2) {
-        return false;
-    }
-    uint16_t format = readU16(cmap_data, offset);
-    bool success = false;
-    const uint8_t* tableData = cmap_data + offset;
-    const size_t tableSize = cmap_size - offset;
-    if (format == 4) {
-        success = getCoverageFormat4(coverageVec, tableData, tableSize);
-    } else if (format == 12) {
-        success = getCoverageFormat12(coverageVec, tableData, tableSize);
-    }
-    if (success) {
-        coverage.initFromRanges(&coverageVec.front(), coverageVec.size() >> 1);
-    }
-#ifdef VERBOSE_DEBUG
-    for (size_t i = 0; i < coverageVec.size(); i += 2) {
-        ALOGD("%x:%x\n", coverageVec[i], coverageVec[i + 1]);
-    }
-    ALOGD("success = %d", success);
-#endif
-    return success;
+    *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable);
+    return true;
 }
 
-}  // namespace android
+static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out,
+        const uint8_t* data, size_t size, const SparseBitSet& baseCoverage) {
+    constexpr size_t kHeaderSize = 10;
+    constexpr size_t kRecordSize = 11;
+    constexpr size_t kLengthOffset = 2;
+    constexpr size_t kNumRecordOffset = 6;
+
+    out->clear();
+    if (size < kHeaderSize) {
+        return;
+    }
+
+    const uint32_t length = readU32(data, kLengthOffset);
+    if (size < length) {
+        return;
+    }
+
+    uint32_t numRecords = readU32(data, kNumRecordOffset);
+    if (numRecords == 0 || kHeaderSize + kRecordSize * numRecords > length) {
+        return;
+    }
+
+    for (uint32_t i = 0; i < numRecords; ++i) {
+        // Insert from the largest code points since it determines the size of the output vector.
+        const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1);
+        const uint32_t vsCodePoint = readU24(data, recordHeadOffset);
+        const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3);
+        const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7);
+        if (defaultUVSOffset > length || nonDefaultUVSOffset > length) {
+            continue;
+        }
+
+        const uint16_t vsIndex = getVsIndex(vsCodePoint);
+        if (vsIndex == INVALID_VS_INDEX) {
+            continue;
+        }
+        std::vector<uint32_t> ranges;
+        if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset,
+                baseCoverage)) {
+            continue;
+        }
+        if (out->size() < vsIndex + 1) {
+            out->resize(vsIndex + 1);
+        }
+        (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1));
+    }
+
+    out->shrink_to_fit();
+}
+
+SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
+        std::vector<std::unique_ptr<SparseBitSet>>* out) {
+    constexpr size_t kHeaderSize = 4;
+    constexpr size_t kNumTablesOffset = 2;
+    constexpr size_t kTableSize = 8;
+    constexpr size_t kPlatformIdOffset = 0;
+    constexpr size_t kEncodingIdOffset = 2;
+    constexpr size_t kOffsetOffset = 4;
+    constexpr size_t kFormatOffset = 0;
+    constexpr uint32_t kNoTable = UINT32_MAX;
+
+    if (kHeaderSize > cmap_size) {
+        return SparseBitSet();
+    }
+    uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
+    if (kHeaderSize + numTables * kTableSize > cmap_size) {
+        return SparseBitSet();
+    }
+
+    uint32_t bestTableOffset = kNoTable;
+    uint16_t bestTableFormat = 0;
+    uint8_t bestTablePriority = kLowestPriority;
+    uint32_t vsTableOffset = kNoTable;
+    for (uint32_t i = 0; i < numTables; ++i) {
+        const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
+        const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
+        const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
+        const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
+
+        if (offset > cmap_size - 2) {
+            continue;  // Invalid table: not enough space to read.
+        }
+        const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
+
+        if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
+            if (vsTableOffset == kNoTable && format == 14) {
+                vsTableOffset = offset;
+            } else {
+                // Ignore the (0, 5) table if we have already seen another valid one or it's in a
+                // format we don't understand.
+            }
+        } else {
+            uint32_t length;
+            uint32_t language;
+
+            if (format == 4) {
+                constexpr size_t lengthOffset = 2;
+                constexpr size_t languageOffset = 4;
+                constexpr size_t minTableSize = languageOffset + 2;
+                if (offset > cmap_size - minTableSize) {
+                    continue;  // Invalid table: not enough space to read.
+                }
+                length = readU16(cmap_data, offset + lengthOffset);
+                language = readU16(cmap_data, offset + languageOffset);
+            } else if (format == 12) {
+                constexpr size_t lengthOffset = 4;
+                constexpr size_t languageOffset = 8;
+                constexpr size_t minTableSize = languageOffset + 4;
+                if (offset > cmap_size - minTableSize) {
+                    continue;  // Invalid table: not enough space to read.
+                }
+                length = readU32(cmap_data, offset + lengthOffset);
+                language = readU32(cmap_data, offset + languageOffset);
+            } else {
+                continue;
+            }
+
+            if (length > cmap_size - offset) {
+                continue;  // Invalid table: table length is larger than whole cmap data size.
+            }
+            if (language != 0) {
+                // Unsupported or invalid table: this is either a subtable for the Macintosh
+                // platform (which we don't support), or an invalid subtable since language field
+                // should be zero for non-Macintosh subtables.
+                continue;
+            }
+            const uint8_t priority = getTablePriority(platformId, encodingId);
+            if (priority < bestTablePriority) {
+                bestTableOffset = offset;
+                bestTablePriority = priority;
+                bestTableFormat = format;
+            }
+        }
+        if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) {
+            // Already found the highest priority table and variation sequences table. No need to
+            // look at remaining tables.
+            break;
+        }
+    }
+
+    SparseBitSet coverage;
+
+    if (bestTableOffset != kNoTable) {
+        const uint8_t* tableData = cmap_data + bestTableOffset;
+        const size_t tableSize = cmap_size - bestTableOffset;
+        bool success;
+        vector<uint32_t> coverageVec;
+        if (bestTableFormat == 4) {
+            success = getCoverageFormat4(coverageVec, tableData, tableSize);
+        } else {
+            success = getCoverageFormat12(coverageVec, tableData, tableSize);
+        }
+
+        if (success) {
+            coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
+        }
+    }
+
+    if (vsTableOffset != kNoTable) {
+        const uint8_t* tableData = cmap_data + vsTableOffset;
+        const size_t tableSize = cmap_size - vsTableOffset;
+        getCoverageFormat14(out, tableData, tableSize, coverage);
+    }
+    return coverage;
+}
+
+}  // namespace minikin

diff --git a/libs/minikin/Emoji.cpp b/libs/minikin/Emoji.cpp
new file mode 100644
index 0000000..fbe68ca
--- /dev/null
+++ b/libs/minikin/Emoji.cpp

@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <minikin/Emoji.h>
+
+namespace minikin {
+
+bool isNewEmoji(uint32_t c) {
+    // Emoji characters new in Unicode emoji 5.0.
+    // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
+    // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0.
+    if (c < 0x1F6F7 || c > 0x1F9E6) {
+        // Optimization for characters outside the new emoji range.
+        return false;
+    }
+    return (0x1F6F7 <= c && c <= 0x1F6F8)
+            || c == 0x1F91F
+            || (0x1F928 <= c && c <= 0x1F92F)
+            || (0x1F931 <= c && c <= 0x1F932)
+            || c == 0x1F94C
+            || (0x1F95F <= c && c <= 0x1F96B)
+            || (0x1F992 <= c && c <= 0x1F997)
+            || (0x1F9D0 <= c && c <= 0x1F9E6);
+}
+
+bool isEmoji(uint32_t c) {
+    return isNewEmoji(c) || u_hasBinaryProperty(c, UCHAR_EMOJI);
+}
+
+bool isEmojiModifier(uint32_t c) {
+    // Emoji modifier are not expected to change, so there's a small change we need to customize
+    // this.
+    return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER);
+}
+
+bool isEmojiBase(uint32_t c) {
+    // These two characters were removed from Emoji_Modifier_Base in Emoji 4.0, but we need to keep
+    // them as emoji modifier bases since there are fonts and user-generated text out there that
+    // treats these as potential emoji bases.
+    if (c == 0x1F91D || c == 0x1F93C) {
+        return true;
+    }
+    // Emoji Modifier Base characters new in Unicode emoji 5.0.
+    // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
+    // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0.
+    if (c == 0x1F91F
+            || (0x1F931 <= c && c <= 0x1F932)
+            || (0x1F9D1 <= c && c <= 0x1F9DD)) {
+        return true;
+    }
+    return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER_BASE);
+}
+
+UCharDirection emojiBidiOverride(const void* /* context */, UChar32 c) {
+    if (isNewEmoji(c)) {
+        // All new emoji characters in Unicode 10.0 are of the bidi class ON.
+        return U_OTHER_NEUTRAL;
+    } else {
+        return u_charDirection(c);
+    }
+}
+
+}  // namespace minikin
+

diff --git a/libs/minikin/FontCollection.cpp b/libs/minikin/FontCollection.cpp
index 33418ab..871d974 100644
--- a/libs/minikin/FontCollection.cpp
+++ b/libs/minikin/FontCollection.cpp

@@ -17,20 +17,22 @@
 // #define VERBOSE_DEBUG
 
 #define LOG_TAG "Minikin"
-#include <cutils/log.h>
+
 #include <algorithm>
 
+#include <log/log.h>
 #include "unicode/unistr.h"
 #include "unicode/unorm2.h"
 
 #include "FontLanguage.h"
 #include "FontLanguageListCache.h"
 #include "MinikinInternal.h"
+#include <minikin/Emoji.h>
 #include <minikin/FontCollection.h>
 
 using std::vector;
 
-namespace android {
+namespace minikin {
 
 template <typename T>
 static inline T max(T a, T b) {
@@ -40,47 +42,21 @@
 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
 const uint32_t TEXT_STYLE_VS = 0xFE0E;
 
-// See http://www.unicode.org/Public/9.0.0/ucd/StandardizedVariants.txt
-// U+2640, U+2642, U+2695 are now in emoji category but not listed in above file, so added them by
-// manual.
-// Must be sorted.
-const uint32_t EMOJI_STYLE_VS_BASES[] = {
-    0x0023, 0x002A, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039,
-    0x00A9, 0x00AE, 0x203C, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199,
-    0x21A9, 0x21AA, 0x231A, 0x231B, 0x2328, 0x23CF, 0x23ED, 0x23EE, 0x23EF, 0x23F1, 0x23F2, 0x23F8,
-    0x23F9, 0x23FA, 0x24C2, 0x25AA, 0x25AB, 0x25B6, 0x25C0, 0x25FB, 0x25FC, 0x25FD, 0x25FE, 0x2600,
-    0x2601, 0x2602, 0x2603, 0x2604, 0x260E, 0x2611, 0x2614, 0x2615, 0x2618, 0x261D, 0x2620, 0x2622,
-    0x2623, 0x2626, 0x262A, 0x262E, 0x262F, 0x2638, 0x2639, 0x263A, 0x2640, 0x2642, 0x2648, 0x2649,
-    0x264A, 0x264B, 0x264C, 0x264D, 0x264E, 0x264F, 0x2650, 0x2651, 0x2652, 0x2653, 0x2660, 0x2663,
-    0x2665, 0x2666, 0x2668, 0x267B, 0x267F, 0x2692, 0x2693, 0x2694, 0x2695, 0x2696, 0x2697, 0x2699,
-    0x269B, 0x269C, 0x26A0, 0x26A1, 0x26AA, 0x26AB, 0x26B0, 0x26B1, 0x26BD, 0x26BE, 0x26C4, 0x26C5,
-    0x26C8, 0x26CF, 0x26D1, 0x26D3, 0x26D4, 0x26E9, 0x26EA, 0x26F0, 0x26F1, 0x26F2, 0x26F3, 0x26F4,
-    0x26F5, 0x26F7, 0x26F8, 0x26F9, 0x26FA, 0x26FD, 0x2702, 0x2708, 0x2709, 0x270C, 0x270D, 0x270F,
-    0x2712, 0x2714, 0x2716, 0x271D, 0x2721, 0x2733, 0x2734, 0x2744, 0x2747, 0x2757, 0x2763, 0x2764,
-    0x27A1, 0x2934, 0x2935, 0x2B05, 0x2B06, 0x2B07, 0x2B1B, 0x2B1C, 0x2B50, 0x2B55, 0x3030, 0x303D,
-    0x3297, 0x3299, 0x1F004, 0x1F170, 0x1F171, 0x1F17E, 0x1F17F, 0x1F202, 0x1F21A, 0x1F22F, 0x1F237,
-    0x1F321, 0x1F324, 0x1F325, 0x1F326, 0x1F327, 0x1F328, 0x1F329, 0x1F32A, 0x1F32B, 0x1F32C,
-    0x1F336, 0x1F37D, 0x1F396, 0x1F397, 0x1F399, 0x1F39A, 0x1F39B, 0x1F39E, 0x1F39F, 0x1F3CB,
-    0x1F3CC, 0x1F3CD, 0x1F3CE, 0x1F3D4, 0x1F3D5, 0x1F3D6, 0x1F3D7, 0x1F3D8, 0x1F3D9, 0x1F3DA,
-    0x1F3DB, 0x1F3DC, 0x1F3DD, 0x1F3DE, 0x1F3DF, 0x1F3F3, 0x1F3F5, 0x1F3F7, 0x1F43F, 0x1F441,
-    0x1F4FD, 0x1F549, 0x1F54A, 0x1F56F, 0x1F570, 0x1F573, 0x1F574, 0x1F575, 0x1F576, 0x1F577,
-    0x1F578, 0x1F579, 0x1F587, 0x1F58A, 0x1F58B, 0x1F58C, 0x1F58D, 0x1F590, 0x1F5A5, 0x1F5A8,
-    0x1F5B1, 0x1F5B2, 0x1F5BC, 0x1F5C2, 0x1F5C3, 0x1F5C4, 0x1F5D1, 0x1F5D2, 0x1F5D3, 0x1F5DC,
-    0x1F5DD, 0x1F5DE, 0x1F5E1, 0x1F5E3, 0x1F5E8, 0x1F5EF, 0x1F5F3, 0x1F5FA, 0x1F6CB, 0x1F6CD,
-    0x1F6CE, 0x1F6CF, 0x1F6E0, 0x1F6E1, 0x1F6E2, 0x1F6E3, 0x1F6E4, 0x1F6E5, 0x1F6E9, 0x1F6F0,
-    0x1F6F3,
-};
-
-static bool isEmojiStyleVSBase(uint32_t cp) {
-    const size_t length = sizeof(EMOJI_STYLE_VS_BASES) / sizeof(EMOJI_STYLE_VS_BASES[0]);
-    return std::binary_search(EMOJI_STYLE_VS_BASES, EMOJI_STYLE_VS_BASES + length, cp);
-}
-
 uint32_t FontCollection::sNextId = 0;
 
-FontCollection::FontCollection(const vector<FontFamily*>& typefaces) :
+FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
+    std::vector<std::shared_ptr<FontFamily>> typefaces;
+    typefaces.push_back(typeface);
+    init(typefaces);
+}
+
+FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) :
     mMaxChar(0) {
-    AutoMutex _l(gMinikinLock);
+    init(typefaces);
+}
+
+void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
+    android::AutoMutex _l(gMinikinLock);
     mId = sNextId++;
     vector<uint32_t> lastChar;
     size_t nTypefaces = typefaces.size();
@@ -89,29 +65,27 @@
 #endif
     const FontStyle defaultStyle;
     for (size_t i = 0; i < nTypefaces; i++) {
-        FontFamily* family = typefaces[i];
-        MinikinFont* typeface = family->getClosestMatch(defaultStyle).font;
-        if (typeface == NULL) {
+        const std::shared_ptr<FontFamily>& family = typefaces[i];
+        if (family->getClosestMatch(defaultStyle).font == nullptr) {
             continue;
         }
-        family->RefLocked();
-        const SparseBitSet* coverage = family->getCoverage();
-        if (coverage == nullptr) {
-            family->UnrefLocked();
-            continue;
-        }
+        const SparseBitSet& coverage = family->getCoverage();
         mFamilies.push_back(family);  // emplace_back would be better
         if (family->hasVSTable()) {
             mVSFamilyVec.push_back(family);
         }
-        mMaxChar = max(mMaxChar, coverage->length());
-        lastChar.push_back(coverage->nextSetBit(0));
+        mMaxChar = max(mMaxChar, coverage.length());
+        lastChar.push_back(coverage.nextSetBit(0));
+
+        const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
+        mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
     }
     nTypefaces = mFamilies.size();
     LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
         "Font collection must have at least one valid typeface");
+    LOG_ALWAYS_FATAL_IF(nTypefaces > 254,
+        "Font collection may only have up to 254 font families.");
     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
-    size_t offset = 0;
     // TODO: Use variation selector map for mRanges construction.
     // A font can have a glyph for a base code point and variation selector pair but no glyph for
     // the base code point without variation selector. The family won't be listed in the range in
@@ -123,27 +97,23 @@
 #ifdef VERBOSE_DEBUG
         ALOGD("i=%zd: range start = %zd\n", i, offset);
 #endif
-        range->start = offset;
+        range->start = mFamilyVec.size();
         for (size_t j = 0; j < nTypefaces; j++) {
             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
-                FontFamily* family = mFamilies[j];
-                mFamilyVec.push_back(family);
-                offset++;
-                uint32_t nextChar = family->getCoverage()->nextSetBit((i + 1) << kLogCharsPerPage);
+                const std::shared_ptr<FontFamily>& family = mFamilies[j];
+                mFamilyVec.push_back(static_cast<uint8_t>(j));
+                uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
 #ifdef VERBOSE_DEBUG
                 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
 #endif
                 lastChar[j] = nextChar;
             }
         }
-        range->end = offset;
+        range->end = mFamilyVec.size();
     }
-}
-
-FontCollection::~FontCollection() {
-    for (size_t i = 0; i < mFamilies.size(); i++) {
-        mFamilies[i]->UnrefLocked();
-    }
+    // See the comment in Range for more details.
+    LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
+        "Exceeded the maximum indexable cmap coverage.");
 }
 
 // Special scores for the font fallback.
@@ -167,7 +137,7 @@
 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
 //    given character or variation sequence.
 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
-                                        FontFamily* fontFamily) const {
+        const std::shared_ptr<FontFamily>& fontFamily) const {
 
     const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
@@ -194,9 +164,10 @@
 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
 // - Returns 1 if the variation selector is not specified or if the font family only supports the
 //   variation sequence's base character.
-uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const {
+uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs,
+        const std::shared_ptr<FontFamily>& fontFamily) const {
     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
-    if (!hasVSGlyph && !fontFamily->getCoverage()->get(ch)) {
+    if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
         // The font doesn't support either variation sequence or even the base character.
         return kUnsupportedFontScore;
     }
@@ -219,7 +190,7 @@
         const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId());
         bool hasEmojiFlag = false;
         for (size_t i = 0; i < langs.size(); ++i) {
-            if (langs[i].hasEmojiFlag()) {
+            if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
                 hasEmojiFlag = true;
                 break;
             }
@@ -234,22 +205,24 @@
     return 1;
 }
 
-// Calculates font scores based on the script matching and primary langauge matching.
+// Calculate font scores based on the script matching, subtag matching and primary langauge matching.
 //
-// If the font's script doesn't support the requested script, the font gets a score of 0. If the
-// font's script supports the requested script and the font has the same primary language as the
-// requested one, the font gets a score of 2. If the font's script supports the requested script
-// but the primary language is different from the requested one, the font gets a score of 1.
+// 1. If only the font's language matches or there is no matches between requested font and
+//    supported font, then the font obtains a score of 0.
+// 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
+//    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
+// 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
+//    language-and-script obtains a socre of 3 with the same reason above.
 //
 // If two languages in the requested list have the same language score, the font matching with
 // higher priority language gets a higher score. For example, in the case the user requested
 // language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
 // than the font of "en-Latn".
 //
-// To achieve the above two conditions, the language score is determined as follows:
-//   LanguageScore = s(0) * 3^(m - 1) + s(1) * 3^(m - 2) + ... + s(m - 2) * 3 + s(m - 1)
+// To achieve score calculation with priorities, the language score is determined as follows:
+//   LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
 // Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
-// matching score. The possible values of s(i) are 0, 1 and 2.
+// matching score. The possible values of s(i) are 0, 1, 2, 3 and 4.
 uint32_t FontCollection::calcLanguageMatchingScore(
         uint32_t userLangListId, const FontFamily& fontFamily) {
     const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
@@ -258,7 +231,7 @@
     const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
     uint32_t score = 0;
     for (size_t i = 0; i < maxCompareNum; ++i) {
-        score = score * 3u + langList[i].calcScoreFor(fontLanguages);
+        score = score * 5u + langList[i].calcScoreFor(fontLanguages);
     }
     return score;
 }
@@ -275,37 +248,26 @@
 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
 // 3. Highest score wins, with ties resolved to the first font.
 // This method never returns nullptr.
-FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
+const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
             uint32_t langListId, int variant) const {
     if (ch >= mMaxChar) {
         return mFamilies[0];
     }
 
-    const std::vector<FontFamily*>* familyVec = &mFamilyVec;
     Range range = mRanges[ch >> kLogCharsPerPage];
 
-    std::vector<FontFamily*> familyVecForVS;
     if (vs != 0) {
-        // If variation selector is specified, need to search for both the variation sequence and
-        // its base codepoint. Compute the union vector of them.
-        familyVecForVS = mVSFamilyVec;
-        familyVecForVS.insert(familyVecForVS.end(),
-                mFamilyVec.begin() + range.start, mFamilyVec.begin() + range.end);
-        std::sort(familyVecForVS.begin(), familyVecForVS.end());
-        auto last = std::unique(familyVecForVS.begin(), familyVecForVS.end());
-        familyVecForVS.erase(last, familyVecForVS.end());
-
-        familyVec = &familyVecForVS;
-        range = { 0, familyVecForVS.size() };
+        range = { 0, static_cast<uint16_t>(mFamilies.size()) };
     }
 
 #ifdef VERBOSE_DEBUG
     ALOGD("querying range %zd:%zd\n", range.start, range.end);
 #endif
-    FontFamily* bestFamily = nullptr;
+    int bestFamilyIndex = -1;
     uint32_t bestScore = kUnsupportedFontScore;
     for (size_t i = range.start; i < range.end; i++) {
-        FontFamily* family = (*familyVec)[i];
+        const std::shared_ptr<FontFamily>& family =
+                vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
         const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
         if (score == kFirstFontScore) {
             // If the first font family supports the given character or variation sequence, always
@@ -314,10 +276,10 @@
         }
         if (score > bestScore) {
             bestScore = score;
-            bestFamily = family;
+            bestFamilyIndex = i;
         }
     }
-    if (bestFamily == nullptr) {
+    if (bestFamilyIndex == -1) {
         UErrorCode errorCode = U_ZERO_ERROR;
         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
         if (U_SUCCESS(errorCode)) {
@@ -329,24 +291,27 @@
                 return getFamilyForChar(ch, vs, langListId, variant);
             }
         }
-        bestFamily = mFamilies[0];
+        return mFamilies[0];
     }
-    return bestFamily;
+    return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
 }
 
-const uint32_t NBSP = 0xa0;
-const uint32_t ZWJ = 0x200c;
-const uint32_t ZWNJ = 0x200d;
+const uint32_t NBSP = 0x00A0;
+const uint32_t SOFT_HYPHEN = 0x00AD;
+const uint32_t ZWJ = 0x200C;
+const uint32_t ZWNJ = 0x200D;
 const uint32_t HYPHEN = 0x2010;
 const uint32_t NB_HYPHEN = 0x2011;
+const uint32_t NNBSP = 0x202F;
 const uint32_t FEMALE_SIGN = 0x2640;
 const uint32_t MALE_SIGN = 0x2642;
 const uint32_t STAFF_OF_AESCULAPIUS = 0x2695;
 
 // Characters where we want to continue using existing font run instead of
 // recomputing the best match in the fallback list.
-static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
-        HYPHEN, NB_HYPHEN, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS };
+static const uint32_t stickyWhitelist[] = {
+        '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
+        HYPHEN, NB_HYPHEN, NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS };
 
 static bool isStickyWhitelisted(uint32_t c) {
     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
@@ -355,10 +320,6 @@
     return false;
 }
 
-static bool isVariationSelector(uint32_t c) {
-    return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
-}
-
 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
         uint32_t variationSelector) const {
     if (!isVariationSelector(variationSelector)) {
@@ -368,8 +329,6 @@
         return false;
     }
 
-    AutoMutex _l(gMinikinLock);
-
     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
@@ -377,12 +336,17 @@
         }
     }
 
+    // TODO: We can remove this lock by precomputing color emoji information.
+    android::AutoMutex _l(gMinikinLock);
+
     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
-    // for emoji + U+FE0E case since we have special fallback rule for the sequence.
-    if (isEmojiStyleVSBase(baseCodepoint) && variationSelector == TEXT_STYLE_VS) {
+    // for <char, text presentation selector> case since we have special fallback rule for the
+    // sequence. Note that we don't need to restrict this to already standardized variation
+    // sequences, since Unicode is adding variation sequences more frequently now and may even move
+    // towards allowing text and emoji variation selectors on any character.
+    if (variationSelector == TEXT_STYLE_VS) {
         for (size_t i = 0; i < mFamilies.size(); ++i) {
-            if (!mFamilies[i]->isColorEmojiFamily() && variationSelector == TEXT_STYLE_VS &&
-                mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
+            if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
                 return true;
             }
         }
@@ -395,7 +359,7 @@
         vector<Run>* result) const {
     const uint32_t langListId = style.getLanguageListId();
     int variant = style.getVariant();
-    FontFamily* lastFamily = NULL;
+    const FontFamily* lastFamily = nullptr;
     Run* run = NULL;
 
     if (string_size == 0) {
@@ -424,17 +388,17 @@
         if (lastFamily != nullptr) {
             if (isStickyWhitelisted(ch)) {
                 // Continue using existing font as long as it has coverage and is whitelisted
-                shouldContinueRun = lastFamily->getCoverage()->get(ch);
-            } else if (isVariationSelector(ch)) {
-                // Always continue if the character is a variation selector.
+                shouldContinueRun = lastFamily->getCoverage().get(ch);
+            } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) {
+                // Always continue if the character is the soft hyphen or a variation selector.
                 shouldContinueRun = true;
             }
         }
 
         if (!shouldContinueRun) {
-            FontFamily* family = getFamilyForChar(ch, isVariationSelector(nextCh) ? nextCh : 0,
-                    langListId, variant);
-            if (utf16Pos == 0 || family != lastFamily) {
+            const std::shared_ptr<FontFamily>& family = getFamilyForChar(
+                    ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant);
+            if (utf16Pos == 0 || family.get() != lastFamily) {
                 size_t start = utf16Pos;
                 // Workaround for combining marks and emoji modifiers until we implement
                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
@@ -444,7 +408,7 @@
                 if (utf16Pos != 0 &&
                         ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 ||
                          (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
-                        family && family->getCoverage()->get(prevCh)) {
+                        family != nullptr && family->getCoverage().get(prevCh)) {
                     const size_t prevChLength = U16_LENGTH(prevCh);
                     run->end -= prevChLength;
                     if (run->start == run->end) {
@@ -452,12 +416,9 @@
                     }
                     start -= prevChLength;
                 }
-                Run dummy;
-                result->push_back(dummy);
+                result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
                 run = &result->back();
-                run->fakedFont = family->getClosestMatch(style);
-                lastFamily = family;
-                run->start = start;
+                lastFamily = family.get();
             }
         }
         prevCh = ch;
@@ -465,16 +426,43 @@
     } while (nextCh != kEndOfString);
 }
 
-MinikinFont* FontCollection::baseFont(FontStyle style) {
-    return baseFontFaked(style).font;
-}
-
 FakedFont FontCollection::baseFontFaked(FontStyle style) {
     return mFamilies[0]->getClosestMatch(style);
 }
 
+std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
+        const std::vector<FontVariation>& variations) {
+    if (variations.empty() || mSupportedAxes.empty()) {
+        return nullptr;
+    }
+
+    bool hasSupportedAxis = false;
+    for (const FontVariation& variation : variations) {
+        if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
+            hasSupportedAxis = true;
+            break;
+        }
+    }
+    if (!hasSupportedAxis) {
+        // None of variation axes are supported by this font collection.
+        return nullptr;
+    }
+
+    std::vector<std::shared_ptr<FontFamily> > families;
+    for (const std::shared_ptr<FontFamily>& family : mFamilies) {
+        std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
+        if (newFamily) {
+            families.push_back(newFamily);
+        } else {
+            families.push_back(family);
+        }
+    }
+
+    return std::shared_ptr<FontCollection>(new FontCollection(families));
+}
+
 uint32_t FontCollection::getId() const {
     return mId;
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/FontFamily.cpp b/libs/minikin/FontFamily.cpp
index e2d86f0..a93cb4f 100644
--- a/libs/minikin/FontFamily.cpp
+++ b/libs/minikin/FontFamily.cpp

@@ -16,29 +16,29 @@
 
 #define LOG_TAG "Minikin"
 
-#include <cutils/log.h>
-#include <stdlib.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include <string.h>
 
+#include <log/log.h>
+#include <utils/JenkinsHash.h>
+
 #include <hb.h>
 #include <hb-ot.h>
 
-#include <utils/JenkinsHash.h>
-
 #include "FontLanguage.h"
 #include "FontLanguageListCache.h"
+#include "FontUtils.h"
 #include "HbFontCache.h"
 #include "MinikinInternal.h"
-#include <minikin/MinikinFont.h>
-#include <minikin/AnalyzeStyle.h>
 #include <minikin/CmapCoverage.h>
+#include <minikin/MinikinFont.h>
 #include <minikin/FontFamily.h>
-#include <UniquePtr.h>
+#include <minikin/MinikinFont.h>
 
 using std::vector;
 
-namespace android {
+namespace minikin {
 
 FontStyle::FontStyle(int variant, int weight, bool italic)
         : FontStyle(FontLanguageListCache::kEmptyListId, variant, weight, italic) {
@@ -48,15 +48,15 @@
         : bits(pack(variant, weight, italic)), mLanguageListId(languageListId) {
 }
 
-hash_t FontStyle::hash() const {
-    uint32_t hash = JenkinsHashMix(0, bits);
-    hash = JenkinsHashMix(hash, mLanguageListId);
-    return JenkinsHashWhiten(hash);
+android::hash_t FontStyle::hash() const {
+    uint32_t hash = android::JenkinsHashMix(0, bits);
+    hash = android::JenkinsHashMix(hash, mLanguageListId);
+    return android::JenkinsHashWhiten(hash);
 }
 
 // static
 uint32_t FontStyle::registerLanguageList(const std::string& languages) {
-    AutoMutex _l(gMinikinLock);
+    android::AutoMutex _l(gMinikinLock);
     return FontLanguageListCache::getId(languages);
 }
 
@@ -65,45 +65,57 @@
     return (weight & kWeightMask) | (italic ? kItalicMask : 0) | (variant << kVariantShift);
 }
 
-FontFamily::FontFamily() : FontFamily(0 /* variant */) {
+Font::Font(const std::shared_ptr<MinikinFont>& typeface, FontStyle style)
+    : typeface(typeface), style(style) {
 }
 
-FontFamily::FontFamily(int variant) : FontFamily(FontLanguageListCache::kEmptyListId, variant) {
+Font::Font(std::shared_ptr<MinikinFont>&& typeface, FontStyle style)
+    : typeface(typeface), style(style) {
 }
 
-FontFamily::~FontFamily() {
-    for (size_t i = 0; i < mFonts.size(); i++) {
-        mFonts[i].typeface->UnrefLocked();
+std::unordered_set<AxisTag> Font::getSupportedAxesLocked() const {
+    const uint32_t fvarTag = MinikinFont::MakeTag('f', 'v', 'a', 'r');
+    HbBlob fvarTable(getFontTable(typeface.get(), fvarTag));
+    if (fvarTable.size() == 0) {
+        return std::unordered_set<AxisTag>();
     }
+
+    std::unordered_set<AxisTag> supportedAxes;
+    analyzeAxes(fvarTable.get(), fvarTable.size(), &supportedAxes);
+    return supportedAxes;
 }
 
-bool FontFamily::addFont(MinikinFont* typeface) {
-    AutoMutex _l(gMinikinLock);
+Font::Font(Font&& o) {
+    typeface = std::move(o.typeface);
+    style = o.style;
+    o.typeface = nullptr;
+}
+
+Font::Font(const Font& o) {
+    typeface = o.typeface;
+    style = o.style;
+}
+
+// static
+FontFamily::FontFamily(std::vector<Font>&& fonts) : FontFamily(0 /* variant */, std::move(fonts)) {
+}
+
+FontFamily::FontFamily(int variant, std::vector<Font>&& fonts)
+    : FontFamily(FontLanguageListCache::kEmptyListId, variant, std::move(fonts)) {
+}
+
+FontFamily::FontFamily(uint32_t langId, int variant, std::vector<Font>&& fonts)
+    : mLangId(langId), mVariant(variant), mFonts(std::move(fonts)) {
+    computeCoverage();
+}
+
+bool FontFamily::analyzeStyle(const std::shared_ptr<MinikinFont>& typeface, int* weight,
+        bool* italic) {
+    android::AutoMutex _l(gMinikinLock);
     const uint32_t os2Tag = MinikinFont::MakeTag('O', 'S', '/', '2');
-    HbBlob os2Table(getFontTable(typeface, os2Tag));
+    HbBlob os2Table(getFontTable(typeface.get(), os2Tag));
     if (os2Table.get() == nullptr) return false;
-    int weight;
-    bool italic;
-    if (analyzeStyle(os2Table.get(), os2Table.size(), &weight, &italic)) {
-        //ALOGD("analyzed weight = %d, italic = %s", weight, italic ? "true" : "false");
-        FontStyle style(weight, italic);
-        addFontLocked(typeface, style);
-        return true;
-    } else {
-        ALOGD("failed to analyze style");
-    }
-    return false;
-}
-
-void FontFamily::addFont(MinikinFont* typeface, FontStyle style) {
-    AutoMutex _l(gMinikinLock);
-    addFontLocked(typeface, style);
-}
-
-void FontFamily::addFontLocked(MinikinFont* typeface, FontStyle style) {
-    typeface->RefLocked();
-    mFonts.push_back(Font(typeface, style));
-    mCoverageValid = false;
+    return ::minikin::analyzeStyle(os2Table.get(), os2Table.size(), weight, italic);
 }
 
 // Compute a matching metric between two styles - 0 is an exact match
@@ -127,7 +139,7 @@
 }
 
 FakedFont FontFamily::getClosestMatch(FontStyle style) const {
-    const Font* bestFont = NULL;
+    const Font* bestFont = nullptr;
     int bestMatch = 0;
     for (size_t i = 0; i < mFonts.size(); i++) {
         const Font& font = mFonts[i];
@@ -137,81 +149,107 @@
             bestMatch = match;
         }
     }
-    FakedFont result;
-    if (bestFont == NULL) {
-        result.font = NULL;
-    } else {
-        result.font = bestFont->typeface;
-        result.fakery = computeFakery(style, bestFont->style);
+    if (bestFont != nullptr) {
+        return FakedFont{ bestFont->typeface.get(), computeFakery(style, bestFont->style) };
     }
-    return result;
-}
-
-size_t FontFamily::getNumFonts() const {
-    return mFonts.size();
-}
-
-MinikinFont* FontFamily::getFont(size_t index) const {
-    return mFonts[index].typeface;
-}
-
-FontStyle FontFamily::getStyle(size_t index) const {
-    return mFonts[index].style;
+    return FakedFont{ nullptr, FontFakery() };
 }
 
 bool FontFamily::isColorEmojiFamily() const {
     const FontLanguages& languageList = FontLanguageListCache::getById(mLangId);
     for (size_t i = 0; i < languageList.size(); ++i) {
-        if (languageList[i].hasEmojiFlag()) {
+        if (languageList[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
             return true;
         }
     }
     return false;
 }
 
-const SparseBitSet* FontFamily::getCoverage() {
-    if (!mCoverageValid) {
-        const FontStyle defaultStyle;
-        MinikinFont* typeface = getClosestMatch(defaultStyle).font;
-        const uint32_t cmapTag = MinikinFont::MakeTag('c', 'm', 'a', 'p');
-        HbBlob cmapTable(getFontTable(typeface, cmapTag));
-        if (cmapTable.get() == nullptr) {
-            ALOGE("Could not get cmap table size!\n");
-            // Note: This means we will retry on the next call to getCoverage, as we can't store
-            //       the failure. This is fine, as we assume this doesn't really happen in practice.
-            return nullptr;
-        }
-        // TODO: Error check?
-        CmapCoverage::getCoverage(mCoverage, cmapTable.get(), cmapTable.size(), &mHasVSTable);
-#ifdef VERBOSE_DEBUG
-        ALOGD("font coverage length=%d, first ch=%x\n", mCoverage.length(),
-                mCoverage.nextSetBit(0));
-#endif
-        mCoverageValid = true;
+void FontFamily::computeCoverage() {
+    android::AutoMutex _l(gMinikinLock);
+    const FontStyle defaultStyle;
+    const MinikinFont* typeface = getClosestMatch(defaultStyle).font;
+    const uint32_t cmapTag = MinikinFont::MakeTag('c', 'm', 'a', 'p');
+    HbBlob cmapTable(getFontTable(typeface, cmapTag));
+    if (cmapTable.get() == nullptr) {
+        ALOGE("Could not get cmap table size!\n");
+        return;
     }
-    return &mCoverage;
+    mCoverage = CmapCoverage::getCoverage(cmapTable.get(), cmapTable.size(), &mCmapFmt14Coverage);
+
+    for (size_t i = 0; i < mFonts.size(); ++i) {
+        std::unordered_set<AxisTag> supportedAxes = mFonts[i].getSupportedAxesLocked();
+        mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
+    }
 }
 
-bool FontFamily::hasGlyph(uint32_t codepoint, uint32_t variationSelector) {
-    assertMinikinLocked();
-    if (variationSelector != 0 && !mHasVSTable) {
-        // Early exit if the variation selector is specified but the font doesn't have a cmap format
-        // 14 subtable.
+bool FontFamily::hasGlyph(uint32_t codepoint, uint32_t variationSelector) const {
+    if (variationSelector == 0) {
+        return mCoverage.get(codepoint);
+    }
+
+    if (mCmapFmt14Coverage.empty()) {
         return false;
     }
 
-    const FontStyle defaultStyle;
-    MinikinFont* minikinFont = getClosestMatch(defaultStyle).font;
-    hb_font_t* font = getHbFontLocked(minikinFont);
-    uint32_t unusedGlyph;
-    bool result = hb_font_get_glyph(font, codepoint, variationSelector, &unusedGlyph);
-    hb_font_destroy(font);
-    return result;
+    const uint16_t vsIndex = getVsIndex(variationSelector);
+
+    if (vsIndex >= mCmapFmt14Coverage.size()) {
+        // Even if vsIndex is INVALID_VS_INDEX, we reach here since INVALID_VS_INDEX is defined to
+        // be at the maximum end of the range.
+        return false;
+    }
+
+    const std::unique_ptr<SparseBitSet>& bitset = mCmapFmt14Coverage[vsIndex];
+    if (bitset.get() == nullptr) {
+        return false;
+    }
+
+    return bitset->get(codepoint);
 }
 
-bool FontFamily::hasVSTable() const {
-    LOG_ALWAYS_FATAL_IF(!mCoverageValid, "Do not call this method before getCoverage() call");
-    return mHasVSTable;
+std::shared_ptr<FontFamily> FontFamily::createFamilyWithVariation(
+        const std::vector<FontVariation>& variations) const {
+    if (variations.empty() || mSupportedAxes.empty()) {
+        return nullptr;
+    }
+
+    bool hasSupportedAxis = false;
+    for (const FontVariation& variation : variations) {
+        if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
+            hasSupportedAxis = true;
+            break;
+        }
+    }
+    if (!hasSupportedAxis) {
+        // None of variation axes are suppored by this family.
+        return nullptr;
+    }
+
+    std::vector<Font> fonts;
+    for (const Font& font : mFonts) {
+        bool supportedVariations = false;
+        android::AutoMutex _l(gMinikinLock);
+        std::unordered_set<AxisTag> supportedAxes = font.getSupportedAxesLocked();
+        if (!supportedAxes.empty()) {
+            for (const FontVariation& variation : variations) {
+                if (supportedAxes.find(variation.axisTag) != supportedAxes.end()) {
+                    supportedVariations = true;
+                    break;
+                }
+            }
+        }
+        std::shared_ptr<MinikinFont> minikinFont;
+        if (supportedVariations) {
+            minikinFont = font.typeface->createFontWithVariation(variations);
+        }
+        if (minikinFont == nullptr) {
+            minikinFont = font.typeface;
+        }
+        fonts.push_back(Font(std::move(minikinFont), font.style));
+    }
+
+    return std::shared_ptr<FontFamily>(new FontFamily(mLangId, mVariant, std::move(fonts)));
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/FontLanguage.cpp b/libs/minikin/FontLanguage.cpp
index bccb4bf..0897c06 100644
--- a/libs/minikin/FontLanguage.cpp
+++ b/libs/minikin/FontLanguage.cpp

@@ -18,43 +18,192 @@
 
 #include "FontLanguage.h"
 
+#include <algorithm>
 #include <hb.h>
+#include <string.h>
 #include <unicode/uloc.h>
 
-namespace android {
+namespace minikin {
 
 #define SCRIPT_TAG(c1, c2, c3, c4) \
         (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) <<  8 | \
          ((uint32_t)(c4)))
 
+// Check if a language code supports emoji according to its subtag
+static bool isEmojiSubtag(const char* buf, size_t bufLen, const char* subtag, size_t subtagLen) {
+    if (bufLen < subtagLen) {
+        return false;
+    }
+    if (strncmp(buf, subtag, subtagLen) != 0) {
+        return false;  // no match between two strings
+    }
+    return (bufLen == subtagLen || buf[subtagLen] == '\0' ||
+            buf[subtagLen] == '-' || buf[subtagLen] == '_');
+}
+
+// Pack the three letter code into 15 bits and stored to 16 bit integer. The highest bit is 0.
+// For the region code, the letters must be all digits in three letter case, so the number of
+// possible values are 10. For the language code, the letters must be all small alphabets, so the
+// number of possible values are 26. Thus, 5 bits are sufficient for each case and we can pack the
+// three letter language code or region code to 15 bits.
+//
+// In case of two letter code, use fullbit(0x1f) for the first letter instead.
+static uint16_t packLanguageOrRegion(const char* c, size_t length, uint8_t twoLetterBase,
+        uint8_t threeLetterBase) {
+    if (length == 2) {
+        return 0x7c00u |  // 0x1fu << 10
+                (uint16_t)(c[0] - twoLetterBase) << 5 |
+                (uint16_t)(c[1] - twoLetterBase);
+    } else {
+        return ((uint16_t)(c[0] - threeLetterBase) << 10) |
+                (uint16_t)(c[1] - threeLetterBase) << 5 |
+                (uint16_t)(c[2] - threeLetterBase);
+    }
+}
+
+static size_t unpackLanguageOrRegion(uint16_t in, char* out, uint8_t twoLetterBase,
+        uint8_t threeLetterBase) {
+    uint8_t first = (in >> 10) & 0x1f;
+    uint8_t second = (in >> 5) & 0x1f;
+    uint8_t third = in & 0x1f;
+
+    if (first == 0x1f) {
+        out[0] = second + twoLetterBase;
+        out[1] = third + twoLetterBase;
+        return 2;
+    } else {
+        out[0] = first + threeLetterBase;
+        out[1] = second + threeLetterBase;
+        out[2] = third + threeLetterBase;
+        return 3;
+    }
+}
+
+// Find the next '-' or '_' index from startOffset position. If not found, returns bufferLength.
+static size_t nextDelimiterIndex(const char* buffer, size_t bufferLength, size_t startOffset) {
+    for (size_t i = startOffset; i < bufferLength; ++i) {
+        if (buffer[i] == '-' || buffer[i] == '_') {
+            return i;
+        }
+    }
+    return bufferLength;
+}
+
+static inline bool isLowercase(char c) {
+    return 'a' <= c && c <= 'z';
+}
+
+static inline bool isUppercase(char c) {
+    return 'A' <= c && c <= 'Z';
+}
+
+static inline bool isDigit(char c) {
+    return '0' <= c && c <= '9';
+}
+
+// Returns true if the buffer is valid for language code.
+static inline bool isValidLanguageCode(const char* buffer, size_t length) {
+    if (length != 2 && length != 3) return false;
+    if (!isLowercase(buffer[0])) return false;
+    if (!isLowercase(buffer[1])) return false;
+    if (length == 3 && !isLowercase(buffer[2])) return false;
+    return true;
+}
+
+// Returns true if buffer is valid for script code. The length of buffer must be 4.
+static inline bool isValidScriptCode(const char* buffer) {
+    return isUppercase(buffer[0]) && isLowercase(buffer[1]) && isLowercase(buffer[2]) &&
+        isLowercase(buffer[3]);
+}
+
+// Returns true if the buffer is valid for region code.
+static inline bool isValidRegionCode(const char* buffer, size_t length) {
+    return (length == 2 && isUppercase(buffer[0]) && isUppercase(buffer[1])) ||
+            (length == 3 && isDigit(buffer[0]) && isDigit(buffer[1]) && isDigit(buffer[2]));
+}
+
 // Parse BCP 47 language identifier into internal structure
 FontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
-    size_t i;
-    for (i = 0; i < length; i++) {
-        char c = buf[i];
-        if (c == '-' || c == '_') break;
-    }
-    if (i == 2 || i == 3) {  // only accept two or three letter language code.
-        mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0);
+    size_t firstDelimiterPos = nextDelimiterIndex(buf, length, 0);
+    if (isValidLanguageCode(buf, firstDelimiterPos)) {
+        mLanguage = packLanguageOrRegion(buf, firstDelimiterPos, 'a', 'a');
     } else {
         // We don't understand anything other than two-letter or three-letter
         // language codes, so we skip parsing the rest of the string.
-        mLanguage = 0ul;
         return;
     }
 
-    size_t next;
-    for (i++; i < length; i = next + 1) {
-        for (next = i; next < length; next++) {
-            char c = buf[next];
-            if (c == '-' || c == '_') break;
+    if (firstDelimiterPos == length) {
+        mHbLanguage = hb_language_from_string(getString().c_str(), -1);
+        return;  // Language code only.
+    }
+
+    size_t nextComponentStartPos = firstDelimiterPos + 1;
+    size_t nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos);
+    size_t componentLength = nextDelimiterPos - nextComponentStartPos;
+
+    if (componentLength == 4) {
+        // Possibly script code.
+        const char* p = buf + nextComponentStartPos;
+        if (isValidScriptCode(p)) {
+            mScript = SCRIPT_TAG(p[0], p[1], p[2], p[3]);
+            mSubScriptBits = scriptToSubScriptBits(mScript);
         }
-        if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') {
-            mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
+
+        if (nextDelimiterPos == length) {
+            mHbLanguage = hb_language_from_string(getString().c_str(), -1);
+            mEmojiStyle = resolveEmojiStyle(buf, length, mScript);
+            return;  // No region code.
+        }
+
+        nextComponentStartPos = nextDelimiterPos + 1;
+        nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos);
+        componentLength = nextDelimiterPos - nextComponentStartPos;
+    }
+
+    if (componentLength == 2 || componentLength == 3) {
+        // Possibly region code.
+        const char* p = buf + nextComponentStartPos;
+        if (isValidRegionCode(p, componentLength)) {
+            mRegion = packLanguageOrRegion(p, componentLength, 'A', '0');
         }
     }
 
-    mSubScriptBits = scriptToSubScriptBits(mScript);
+    mHbLanguage = hb_language_from_string(getString().c_str(), -1);
+    mEmojiStyle = resolveEmojiStyle(buf, length, mScript);
+}
+
+// static
+FontLanguage::EmojiStyle FontLanguage::resolveEmojiStyle(const char* buf, size_t length,
+        uint32_t script) {
+    // First, lookup emoji subtag.
+    // 10 is the length of "-u-em-text", which is the shortest emoji subtag,
+    // unnecessary comparison can be avoided if total length is smaller than 10.
+    const size_t kMinSubtagLength = 10;
+    if (length >= kMinSubtagLength) {
+        static const char kPrefix[] = "-u-em-";
+        const char *pos = std::search(buf, buf + length, kPrefix, kPrefix + strlen(kPrefix));
+        if (pos != buf + length) {  // found
+            pos += strlen(kPrefix);
+            const size_t remainingLength = length - (pos - buf);
+            if (isEmojiSubtag(pos, remainingLength, "emoji", 5)){
+                return EMSTYLE_EMOJI;
+            } else if (isEmojiSubtag(pos, remainingLength, "text", 4)){
+                return EMSTYLE_TEXT;
+            } else if (isEmojiSubtag(pos, remainingLength, "default", 7)){
+                return EMSTYLE_DEFAULT;
+            }
+        }
+    }
+
+    // If no emoji subtag was provided, resolve the emoji style from script code.
+    if (script == SCRIPT_TAG('Z', 's', 'y', 'e')) {
+        return EMSTYLE_EMOJI;
+    } else if (script == SCRIPT_TAG('Z', 's', 'y', 'm')) {
+        return EMSTYLE_TEXT;
+    }
+
+    return EMSTYLE_EMPTY;
 }
 
 //static
@@ -95,29 +244,26 @@
         case SCRIPT_TAG('K', 'o', 'r', 'e'):
             subScriptBits = kHanFlag | kHangulFlag;
             break;
-        case SCRIPT_TAG('Z', 's', 'y', 'e'):
-            subScriptBits = kEmojiFlag;
-            break;
     }
     return subScriptBits;
 }
 
 std::string FontLanguage::getString() const {
-    if (mLanguage == 0ul) {
+    if (isUnsupported()) {
         return "und";
     }
     char buf[16];
-    size_t i = 0;
-    buf[i++] = mLanguage & 0xFF ;
-    buf[i++] = (mLanguage >> 8) & 0xFF;
-    char third_letter = (mLanguage >> 16) & 0xFF;
-    if (third_letter != 0) buf[i++] = third_letter;
+    size_t i = unpackLanguageOrRegion(mLanguage, buf, 'a', 'a');
     if (mScript != 0) {
-      buf[i++] = '-';
-      buf[i++] = (mScript >> 24) & 0xFFu;
-      buf[i++] = (mScript >> 16) & 0xFFu;
-      buf[i++] = (mScript >> 8) & 0xFFu;
-      buf[i++] = mScript & 0xFFu;
+        buf[i++] = '-';
+        buf[i++] = (mScript >> 24) & 0xFFu;
+        buf[i++] = (mScript >> 16) & 0xFFu;
+        buf[i++] = (mScript >> 8) & 0xFFu;
+        buf[i++] = mScript & 0xFFu;
+    }
+    if (mRegion != INVALID_CODE) {
+        buf[i++] = '-';
+        i += unpackLanguageOrRegion(mRegion, buf + i, 'A', '0');
     }
     return std::string(buf, i);
 }
@@ -139,28 +285,41 @@
 }
 
 int FontLanguage::calcScoreFor(const FontLanguages& supported) const {
-    int score = 0;
+    bool languageScriptMatch = false;
+    bool subtagMatch = false;
+    bool scriptMatch = false;
+
     for (size_t i = 0; i < supported.size(); ++i) {
+        if (mEmojiStyle != EMSTYLE_EMPTY &&
+               mEmojiStyle == supported[i].mEmojiStyle) {
+            subtagMatch = true;
+            if (mLanguage == supported[i].mLanguage) {
+                return 4;
+            }
+        }
         if (isEqualScript(supported[i]) ||
                 supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) {
+            scriptMatch = true;
             if (mLanguage == supported[i].mLanguage) {
-                return 2;
-            } else {
-                score = 1;
+                languageScriptMatch = true;
             }
         }
     }
 
-    if (score == 1) {
-        return score;
-    }
-
     if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) {
-        // Gives score of 2 only if the language matches all of the font languages except for the
-        // exact match case handled above.
-        return (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) ? 2 : 1;
+        scriptMatch = true;
+        if (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) {
+            return 3;
+        }
     }
 
+    if (languageScriptMatch) {
+        return 3;
+    } else if (subtagMatch) {
+        return 2;
+    } else if (scriptMatch) {
+        return 1;
+    }
     return 0;
 }
 
@@ -183,4 +342,4 @@
 }
 
 #undef SCRIPT_TAG
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/FontLanguage.h b/libs/minikin/FontLanguage.h
index f944174..6a50b1d 100644
--- a/libs/minikin/FontLanguage.h
+++ b/libs/minikin/FontLanguage.h

@@ -22,11 +22,15 @@
 
 #include <hb.h>
 
-namespace android {
+namespace minikin {
 
-// Due to the limits in font fallback score calculation, we can't use anything more than 17
+// Due to the limits in font fallback score calculation, we can't use anything more than 12
 // languages.
-const size_t FONT_LANGUAGES_LIMIT = 17;
+const size_t FONT_LANGUAGES_LIMIT = 12;
+
+// The language or region code is encoded to 15 bits.
+const uint16_t INVALID_CODE = 0x7fff;
+
 class FontLanguages;
 
 // FontLanguage is a compact representation of a BCP 47 language tag. It
@@ -34,22 +38,37 @@
 // font rendering.
 struct FontLanguage {
 public:
+    enum EmojiStyle : uint8_t {
+        EMSTYLE_EMPTY = 0,
+        EMSTYLE_DEFAULT = 1,
+        EMSTYLE_EMOJI = 2,
+        EMSTYLE_TEXT = 3,
+    };
     // Default constructor creates the unsupported language.
-    FontLanguage() : mScript(0ul), mLanguage(0ul), mSubScriptBits(0ul) {}
+    FontLanguage()
+            : mScript(0ul),
+            mLanguage(INVALID_CODE),
+            mRegion(INVALID_CODE),
+            mHbLanguage(HB_LANGUAGE_INVALID),
+            mSubScriptBits(0ul),
+            mEmojiStyle(EMSTYLE_EMPTY) {}
 
     // Parse from string
     FontLanguage(const char* buf, size_t length);
 
     bool operator==(const FontLanguage other) const {
-        return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage;
+        return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage &&
+                mRegion == other.mRegion && mEmojiStyle == other.mEmojiStyle;
     }
 
     bool operator!=(const FontLanguage other) const {
         return !(*this == other);
     }
 
-    bool isUnsupported() const { return mLanguage == 0ul; }
-    bool hasEmojiFlag() const { return mSubScriptBits & kEmojiFlag; }
+    bool isUnsupported() const { return mLanguage == INVALID_CODE; }
+    EmojiStyle getEmojiStyle() const { return mEmojiStyle; }
+    hb_language_t getHbLanguage() const { return mHbLanguage; }
+
 
     bool isEqualScript(const FontLanguage& other) const;
 
@@ -64,7 +83,10 @@
     // 0 = no match, 1 = script match, 2 = script and primary language match.
     int calcScoreFor(const FontLanguages& supported) const;
 
-    uint64_t getIdentifier() const { return (uint64_t)mScript << 32 | (uint64_t)mLanguage; }
+    uint64_t getIdentifier() const {
+        return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | ((uint64_t)mRegion << 2) |
+                mEmojiStyle;
+    }
 
 private:
     friend class FontLanguages;  // for FontLanguages constructor
@@ -73,23 +95,33 @@
     uint32_t mScript;
 
     // ISO 639-1 or ISO 639-2 compliant language code.
-    // The two or three letter language code is packed into 32 bit integer.
+    // The two- or three-letter language code is packed into a 15 bit integer.
     // mLanguage = 0 means the FontLanguage is unsupported.
-    uint32_t mLanguage;
+    uint16_t mLanguage;
 
-    // For faster comparing, use 8 bits for specific scripts.
+    // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is
+    // packed into a 15 bit integer.
+    uint16_t mRegion;
+
+    // The language to be passed HarfBuzz shaper.
+    hb_language_t mHbLanguage;
+
+    // For faster comparing, use 7 bits for specific scripts.
     static const uint8_t kBopomofoFlag = 1u;
-    static const uint8_t kEmojiFlag = 1u << 1;
-    static const uint8_t kHanFlag = 1u << 2;
-    static const uint8_t kHangulFlag = 1u << 3;
-    static const uint8_t kHiraganaFlag = 1u << 4;
-    static const uint8_t kKatakanaFlag = 1u << 5;
-    static const uint8_t kSimplifiedChineseFlag = 1u << 6;
-    static const uint8_t kTraditionalChineseFlag = 1u << 7;
+    static const uint8_t kHanFlag = 1u << 1;
+    static const uint8_t kHangulFlag = 1u << 2;
+    static const uint8_t kHiraganaFlag = 1u << 3;
+    static const uint8_t kKatakanaFlag = 1u << 4;
+    static const uint8_t kSimplifiedChineseFlag = 1u << 5;
+    static const uint8_t kTraditionalChineseFlag = 1u << 6;
     uint8_t mSubScriptBits;
 
+    EmojiStyle mEmojiStyle;
+
     static uint8_t scriptToSubScriptBits(uint32_t script);
 
+    static EmojiStyle resolveEmojiStyle(const char* buf, size_t length, uint32_t script);
+
     // Returns true if the provide subscript bits has the requested subscript bits.
     // Note that this function returns false if the requested subscript bits are empty.
     static bool supportsScript(uint8_t providedBits, uint8_t requestedBits);
@@ -98,7 +130,7 @@
 // An immutable list of languages.
 class FontLanguages {
 public:
-    FontLanguages(std::vector<FontLanguage>&& languages);
+    explicit FontLanguages(std::vector<FontLanguage>&& languages);
     FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {}
     FontLanguages(FontLanguages&&) = default;
 
@@ -121,6 +153,6 @@
     void operator=(const FontLanguages&) = delete;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_FONT_LANGUAGE_H

diff --git a/libs/minikin/FontLanguageListCache.cpp b/libs/minikin/FontLanguageListCache.cpp
index 6b661f0..f1e14f0 100644
--- a/libs/minikin/FontLanguageListCache.cpp
+++ b/libs/minikin/FontLanguageListCache.cpp

@@ -18,14 +18,15 @@
 
 #include "FontLanguageListCache.h"
 
-#include <cutils/log.h>
 #include <unicode/uloc.h>
 #include <unordered_set>
 
-#include "MinikinInternal.h"
-#include "FontLanguage.h"
+#include <log/log.h>
 
-namespace android {
+#include "FontLanguage.h"
+#include "MinikinInternal.h"
+
+namespace minikin {
 
 const uint32_t FontLanguageListCache::kEmptyListId;
 
@@ -152,4 +153,4 @@
     return instance;
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/FontLanguageListCache.h b/libs/minikin/FontLanguageListCache.h
index c961882..9bf156f 100644
--- a/libs/minikin/FontLanguageListCache.h
+++ b/libs/minikin/FontLanguageListCache.h

@@ -22,7 +22,7 @@
 #include <minikin/FontFamily.h>
 #include "FontLanguage.h"
 
-namespace android {
+namespace minikin {
 
 class FontLanguageListCache {
 public:
@@ -51,6 +51,6 @@
     std::unordered_map<std::string, uint32_t> mLanguageListLookupTable;
 };
 
-}  // namespace android
+}  // namespace minikin
 
 #endif  // MINIKIN_FONT_LANGUAGE_LIST_CACHE_H

diff --git a/libs/minikin/FontUtils.cpp b/libs/minikin/FontUtils.cpp
new file mode 100644
index 0000000..c5a32f8
--- /dev/null
+++ b/libs/minikin/FontUtils.cpp

@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "FontUtils.h"
+
+namespace minikin {
+
+static uint16_t readU16(const uint8_t* data, size_t offset) {
+    return data[offset] << 8 | data[offset + 1];
+}
+
+static uint32_t readU32(const uint8_t* data, size_t offset) {
+    return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
+            ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
+}
+
+bool analyzeStyle(const uint8_t* os2_data, size_t os2_size, int* weight, bool* italic) {
+    const size_t kUsWeightClassOffset = 4;
+    const size_t kFsSelectionOffset = 62;
+    const uint16_t kItalicFlag = (1 << 0);
+    if (os2_size < kFsSelectionOffset + 2) {
+        return false;
+    }
+    uint16_t weightClass = readU16(os2_data, kUsWeightClassOffset);
+    *weight = weightClass / 100;
+    uint16_t fsSelection = readU16(os2_data, kFsSelectionOffset);
+    *italic = (fsSelection & kItalicFlag) != 0;
+    return true;
+}
+
+void analyzeAxes(const uint8_t* fvar_data, size_t fvar_size, std::unordered_set<uint32_t>* axes) {
+    const size_t kMajorVersionOffset = 0;
+    const size_t kMinorVersionOffset = 2;
+    const size_t kOffsetToAxesArrayOffset = 4;
+    const size_t kAxisCountOffset = 8;
+    const size_t kAxisSizeOffset = 10;
+
+    axes->clear();
+
+    if (fvar_size < kAxisSizeOffset + 2) {
+        return;
+    }
+    const uint16_t majorVersion = readU16(fvar_data, kMajorVersionOffset);
+    const uint16_t minorVersion = readU16(fvar_data, kMinorVersionOffset);
+    const uint32_t axisOffset = readU16(fvar_data, kOffsetToAxesArrayOffset);
+    const uint32_t axisCount = readU16(fvar_data, kAxisCountOffset);
+    const uint32_t axisSize = readU16(fvar_data, kAxisSizeOffset);
+
+    if (majorVersion != 1 || minorVersion != 0 || axisOffset != 0x10 || axisSize != 0x14) {
+        return;  // Unsupported version.
+    }
+    if (fvar_size < axisOffset + axisOffset * axisCount) {
+        return;  // Invalid table size.
+    }
+    for (uint32_t i = 0; i < axisCount; ++i) {
+        size_t axisRecordOffset = axisOffset + i * axisSize;
+        uint32_t tag = readU32(fvar_data, axisRecordOffset);
+        axes->insert(tag);
+    }
+}
+}  // namespace minikin

diff --git a/include/minikin/AnalyzeStyle.h b/libs/minikin/FontUtils.h
similarity index 70%
rename from include/minikin/AnalyzeStyle.h
rename to libs/minikin/FontUtils.h
index 2989477..d26d5e4 100644
--- a/include/minikin/AnalyzeStyle.h
+++ b/libs/minikin/FontUtils.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,16 @@
  * limitations under the License.
  */
 
-#ifndef MINIKIN_ANALYZE_STYLE_H
-#define MINIKIN_ANALYZE_STYLE_H
+#ifndef MINIKIN_FONT_UTILS_H
+#define MINIKIN_FONT_UTILS_H
 
-namespace android {
+#include <unordered_set>
+
+namespace minikin {
 
 bool analyzeStyle(const uint8_t* os2_data, size_t os2_size, int* weight, bool* italic);
+void analyzeAxes(const uint8_t* fvar_data, size_t fvar_size, std::unordered_set<uint32_t>* axes);
 
-}  // namespace android
+}  // namespace minikin
 
-#endif  // MINIKIN_ANALYZE_STYLE_H
\ No newline at end of file
+#endif  // MINIKIN_ANALYZE_STYLE_H

diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
index 45dd0ff..87de421 100644
--- a/libs/minikin/GraphemeBreak.cpp
+++ b/libs/minikin/GraphemeBreak.cpp

@@ -20,9 +20,10 @@
 #include <unicode/utf16.h>
 
 #include <minikin/GraphemeBreak.h>
+#include <minikin/Emoji.h>
 #include "MinikinInternal.h"
 
-namespace android {
+namespace minikin {
 
 int32_t tailoredGraphemeClusterBreak(uint32_t c) {
     // Characters defined as Control that we want to treat them as Extend.
@@ -39,17 +40,6 @@
             || c == 0xFEFF                  // BOM
             || ((c | 0x7F) == 0xE007F))     // recently undeprecated tag characters in Plane 14
         return U_GCB_EXTEND;
-    // UTC-approved characters for the Prepend class, per
-    // http://www.unicode.org/L2/L2015/15183r-graph-cluster-brk.txt
-    // These should be removed when our copy of ICU gets updated to Unicode 9.0 (~2016 or 2017).
-    else if ((0x0600 <= c && c <= 0x0605) // Arabic subtending marks
-            || c == 0x06DD                // ARABIC SUBTENDING MARK
-            || c == 0x070F                // SYRIAC ABBREVIATION MARK
-            || c == 0x0D4E                // MALAYALAM LETTER DOT REPH
-            || c == 0x110BD               // KAITHI NUMBER SIGN
-            || c == 0x111C2               // SHARADA SIGN JIHVAMULIYA
-            || c == 0x111C3)              // SHARADA SIGN UPADHMANIYA
-        return U_GCB_PREPEND;
     // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they
     // allow a grapheme break before it.
     else if (c == 0x0E33)
@@ -59,15 +49,15 @@
 }
 
 // Returns true for all characters whose IndicSyllabicCategory is Pure_Killer.
-// From http://www.unicode.org/Public/8.0.0/ucd/IndicSyllabicCategory.txt
+// From http://www.unicode.org/Public/9.0.0/ucd/IndicSyllabicCategory.txt
 bool isPureKiller(uint32_t c) {
     return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734
             || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806
             || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B);
 }
 
-bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
-        size_t offset) {
+bool GraphemeBreak::isGraphemeBreak(const float* advances, const uint16_t* buf, size_t start,
+        size_t count, const size_t offset) {
     // This implementation closely follows Unicode Standard Annex #29 on
     // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
     // implementing a tailored version of extended grapheme clusters.
@@ -84,8 +74,9 @@
     uint32_t c1 = 0;
     uint32_t c2 = 0;
     size_t offset_back = offset;
+    size_t offset_forward = offset;
     U16_PREV(buf, start, offset_back, c1);
-    U16_NEXT(buf, offset, start + count, c2);
+    U16_NEXT(buf, offset_forward, start + count, c2);
     int32_t p1 = tailoredGraphemeClusterBreak(c1);
     int32_t p2 = tailoredGraphemeClusterBreak(c2);
     // Rule GB3, CR x LF
@@ -112,73 +103,107 @@
     if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) {
         return false;
     }
-    // Rule GB8a that looks at even-off cases.
-    //
-    // sot   (RI RI)*  RI x RI
-    // [^RI] (RI RI)*  RI x RI
-    //                 RI ÷ RI
-    if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
-        // Look at up to 1000 code units.
-        start = std::max((ssize_t)start, (ssize_t)offset_back - 1000);
-        while (offset_back > start) {
-            U16_PREV(buf, start, offset_back, c1);
-            if (tailoredGraphemeClusterBreak(c1) != U_GCB_REGIONAL_INDICATOR) {
-                offset_back += U16_LENGTH(c1);
-                break;
-            }
-        }
+    // Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
+    if (p2 == U_GCB_EXTEND || p2 == U_GCB_ZWJ || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
+        return false;
+    }
 
-        // Note that the offset has moved forwared 2 code units by U16_NEXT.
-        // The number 4 comes from the number of code units in a whole flag.
-        return (offset - 2 - offset_back) % 4 == 0;
+    // This is used to decide font-dependent grapheme clusters. If we don't have the advance
+    // information, we become conservative in grapheme breaking and assume that it has no advance.
+    const bool c2_has_advance = (advances != nullptr && advances[offset - start] != 0.0);
+
+    // All the following rules are font-dependent, in the way that if we know c2 has an advance,
+    // we definitely know that it cannot form a grapheme with the character(s) before it. So we
+    // make the decision in favor a grapheme break early.
+    if (c2_has_advance) {
+        return true;
     }
-    // Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
-    if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
-        return false;
-    }
-    // Cluster indic syllables together (tailoring of UAX #29)
-    // Known limitation: this is overly conservative, and assumes that the virama may form a
-    // conjunct with the following letter, which doesn't always happen.
+
+    // Note: For Rule GB10 and GB11 below, we do not use the Unicode line breaking properties for
+    // determining emoji-ness and carry our own data, because our data could be more fresh than what
+    // ICU provides.
     //
-    // There is no easy solution to do this correctly. Even querying the font does not help (with
-    // the current font technoloies), since the font may be creating the conjunct using multiple
-    // glyphs, while the user may be perceiving that sequence of glyphs as one conjunct or one
-    // letter.
-    if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9  // virama
-            && !isPureKiller(c1)
-            && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
-        return false;
+    // Tailored version of Rule GB10, (E_Base | EBG) Extend* × E_Modifier.
+    // The rule itself says do not break between emoji base and emoji modifiers, skipping all Extend
+    // characters. Variation selectors are considered Extend, so they are handled fine.
+    //
+    // We tailor this by requiring that an actual ligature is formed. If the font doesn't form a
+    // ligature, we allow a break before the modifier.
+    if (isEmojiModifier(c2)) {
+        uint32_t c0 = c1;
+        size_t offset_backback = offset_back;
+        int32_t p0 = p1;
+        if (p0 == U_GCB_EXTEND && offset_backback > start) {
+            // skip over emoji variation selector
+            U16_PREV(buf, start, offset_backback, c0);
+            p0 = tailoredGraphemeClusterBreak(c0);
+        }
+        if (isEmojiBase(c0)) {
+            return false;
+        }
     }
-    // Tailoring: make emoji sequences with ZWJ a single grapheme cluster
-    if (c1 == 0x200D && isEmoji(c2) && offset_back > start) {
+    // Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG)
+    // We try to make emoji sequences with ZWJ a single grapheme cluster, but only if they actually
+    // merge to one cluster. So we are more relaxed than the UAX #29 rules in accepting any emoji
+    // character after the ZWJ, but are tighter in that we only treat it as one cluster if a
+    // ligature is actually formed and we also require the character before the ZWJ to also be an
+    // emoji.
+    if (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) {
         // look at character before ZWJ to see that both can participate in an emoji zwj sequence
         uint32_t c0 = 0;
-        U16_PREV(buf, start, offset_back, c0);
-        if (c0 == 0xFE0F && offset_back > start) {
+        size_t offset_backback = offset_back;
+        U16_PREV(buf, start, offset_backback, c0);
+        if (c0 == 0xFE0F && offset_backback > start) {
             // skip over emoji variation selector
-            U16_PREV(buf, start, offset_back, c0);
+            U16_PREV(buf, start, offset_backback, c0);
         }
         if (isEmoji(c0)) {
             return false;
         }
     }
-    // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
-    // E_Base x E_Modifier
-    if (isEmojiModifier(c2)) {
-        if (c1 == 0xFE0F && offset_back > start) {
-            // skip over emoji variation selector
-            U16_PREV(buf, start, offset_back, c1);
-        }
-        if (isEmojiBase(c1)) {
+    // Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases.
+    // sot   (RI RI)*  RI x RI
+    // [^RI] (RI RI)*  RI x RI
+    //
+    // If we have font information, we have already broken the cluster if and only if the second
+    // character had no advance, which means a ligature was formed. If we don't, we look back like
+    // UAX #29 recommends, but only up to 1000 code units.
+    if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
+        if (advances != nullptr) {
+            // We have advances information. But if we are here, we already know c2 has no advance.
+            // So we should definitely disallow a break.
             return false;
+        } else {
+            // Look at up to 1000 code units.
+            const size_t lookback_barrier = std::max((ssize_t)start, (ssize_t)offset_back - 1000);
+            size_t offset_backback = offset_back;
+            while (offset_backback > lookback_barrier) {
+                uint32_t c0 = 0;
+                U16_PREV(buf, lookback_barrier, offset_backback, c0);
+                if (tailoredGraphemeClusterBreak(c0) != U_GCB_REGIONAL_INDICATOR) {
+                    offset_backback += U16_LENGTH(c0);
+                    break;
+                }
+            }
+            // The number 4 comes from the number of code units in a whole flag.
+            return (offset - offset_backback) % 4 == 0;
         }
     }
-    // Rule GB10, Any ÷ Any
+    // Cluster Indic syllables together (tailoring of UAX #29).
+    // Immediately after each virama (that is not just a pure killer) followed by a letter, we
+    // disallow grapheme breaks (if we are here, we don't know about advances, or we already know
+    // that c2 has no advance).
+    if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9  // virama
+            && !isPureKiller(c1)
+            && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
+        return false;
+    }
+    // Rule GB999, Any ÷ Any
     return true;
 }
 
-size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
-        size_t offset, MoveOpt opt) {
+size_t GraphemeBreak::getTextRunCursor(const float* advances, const uint16_t* buf, size_t start,
+        size_t count, size_t offset, MoveOpt opt) {
     switch (opt) {
     case AFTER:
         if (offset < start + count) {
@@ -186,7 +211,7 @@
         }
         // fall through
     case AT_OR_AFTER:
-        while (!isGraphemeBreak(buf, start, count, offset)) {
+        while (!isGraphemeBreak(advances, buf, start, count, offset)) {
             offset++;
         }
         break;
@@ -196,12 +221,12 @@
         }
         // fall through
     case AT_OR_BEFORE:
-        while (!isGraphemeBreak(buf, start, count, offset)) {
+        while (!isGraphemeBreak(advances, buf, start, count, offset)) {
             offset--;
         }
         break;
     case AT:
-        if (!isGraphemeBreak(buf, start, count, offset)) {
+        if (!isGraphemeBreak(advances, buf, start, count, offset)) {
             offset = (size_t)-1;
         }
         break;
@@ -209,4 +234,4 @@
     return offset;
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/HbFontCache.cpp b/libs/minikin/HbFontCache.cpp
index 3be942d..af3d783 100644
--- a/libs/minikin/HbFontCache.cpp
+++ b/libs/minikin/HbFontCache.cpp

@@ -18,33 +18,18 @@
 
 #include "HbFontCache.h"
 
-#include <cutils/log.h>
+#include <log/log.h>
+#include <utils/LruCache.h>
+
 #include <hb.h>
 #include <hb-ot.h>
-#include <utils/LruCache.h>
 
 #include <minikin/MinikinFont.h>
 #include "MinikinInternal.h"
 
-namespace android {
+namespace minikin {
 
-static hb_blob_t* referenceTable(hb_face_t* /* face */, hb_tag_t tag, void* userData) {
-    MinikinFont* font = reinterpret_cast<MinikinFont*>(userData);
-    MinikinDestroyFunc destroy = 0;
-    size_t size = 0;
-    const void* buffer = font->GetTable(tag, &size, &destroy);
-    if (buffer == nullptr) {
-        return nullptr;
-    }
-#ifdef VERBOSE_DEBUG
-    ALOGD("referenceTable %c%c%c%c length=%zd",
-        (tag >>24)&0xff, (tag>>16)&0xff, (tag>>8)&0xff, tag&0xff, size);
-#endif
-    return hb_blob_create(reinterpret_cast<const char*>(buffer), size,
-            HB_MEMORY_MODE_READONLY, const_cast<void*>(buffer), destroy);
-}
-
-class HbFontCache : private OnEntryRemoved<int32_t, hb_font_t*> {
+class HbFontCache : private android::OnEntryRemoved<int32_t, hb_font_t*> {
 public:
     HbFontCache() : mCache(kMaxEntries) {
         mCache.setOnEntryRemovedListener(this);
@@ -74,7 +59,7 @@
 private:
     static const size_t kMaxEntries = 100;
 
-    LruCache<int32_t, hb_font_t*> mCache;
+    android::LruCache<int32_t, hb_font_t*> mCache;
 };
 
 HbFontCache* getFontCacheLocked() {
@@ -99,7 +84,7 @@
 
 // Returns a new reference to a hb_font_t object, caller is
 // responsible for calling hb_font_destroy() on it.
-hb_font_t* getHbFontLocked(MinikinFont* minikinFont) {
+hb_font_t* getHbFontLocked(const MinikinFont* minikinFont) {
     assertMinikinLocked();
     // TODO: get rid of nullFaceFont
     static hb_font_t* nullFaceFont = nullptr;
@@ -119,15 +104,12 @@
 
     hb_face_t* face;
     const void* buf = minikinFont->GetFontData();
-    if (buf == nullptr) {
-        face = hb_face_create_for_tables(referenceTable, minikinFont, nullptr);
-    } else {
-        size_t size = minikinFont->GetFontSize();
-        hb_blob_t* blob = hb_blob_create(reinterpret_cast<const char*>(buf), size,
-            HB_MEMORY_MODE_READONLY, nullptr, nullptr);
-        face = hb_face_create(blob, minikinFont->GetFontIndex());
-        hb_blob_destroy(blob);
-    }
+    size_t size = minikinFont->GetFontSize();
+    hb_blob_t* blob = hb_blob_create(reinterpret_cast<const char*>(buf), size,
+        HB_MEMORY_MODE_READONLY, nullptr, nullptr);
+    face = hb_face_create(blob, minikinFont->GetFontIndex());
+    hb_blob_destroy(blob);
+
     hb_font_t* parent_font = hb_font_create(face);
     hb_ot_font_set_funcs(parent_font);
 
@@ -135,10 +117,15 @@
     hb_font_set_scale(parent_font, upem, upem);
 
     font = hb_font_create_sub_font(parent_font);
+    std::vector<hb_variation_t> variations;
+    for (const FontVariation& variation : minikinFont->GetAxes()) {
+        variations.push_back({variation.axisTag, variation.value});
+    }
+    hb_font_set_variations(font, variations.data(), variations.size());
     hb_font_destroy(parent_font);
     hb_face_destroy(face);
     fontCache->put(fontId, font);
     return hb_font_reference(font);
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/HbFontCache.h b/libs/minikin/HbFontCache.h
index 449b354..59969e2 100644
--- a/libs/minikin/HbFontCache.h
+++ b/libs/minikin/HbFontCache.h

@@ -19,12 +19,12 @@
 
 struct hb_font_t;
 
-namespace android {
+namespace minikin {
 class MinikinFont;
 
 void purgeHbFontCacheLocked();
 void purgeHbFontLocked(const MinikinFont* minikinFont);
-hb_font_t* getHbFontLocked(MinikinFont* minikinFont);
+hb_font_t* getHbFontLocked(const MinikinFont* minikinFont);
 
-}  // namespace android
+}  // namespace minikin
 #endif  // MINIKIN_HBFONT_CACHE_H

diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp
index c5eb60b..0605b27 100644
--- a/libs/minikin/Hyphenator.cpp
+++ b/libs/minikin/Hyphenator.cpp

@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <string>
 #include <unicode/uchar.h>
+#include <unicode/uscript.h>
 
 // HACK: for reading pattern file
 #include <fcntl.h>
@@ -30,9 +31,12 @@
 
 using std::vector;
 
-namespace android {
+namespace minikin {
 
+static const uint16_t CHAR_HYPHEN_MINUS = 0x002D;
 static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
+static const uint16_t CHAR_MIDDLE_DOT = 0x00B7;
+static const uint16_t CHAR_HYPHEN = 0x2010;
 
 // The following are structs that correspond to tables inside the hyb file format
 
@@ -104,40 +108,224 @@
     }
 };
 
-Hyphenator* Hyphenator::loadBinary(const uint8_t* patternData) {
+Hyphenator* Hyphenator::loadBinary(const uint8_t* patternData, size_t minPrefix, size_t minSuffix) {
     Hyphenator* result = new Hyphenator;
     result->patternData = patternData;
+    result->minPrefix = minPrefix;
+    result->minSuffix = minSuffix;
     return result;
 }
 
-void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t len) {
+void Hyphenator::hyphenate(vector<HyphenationType>* result, const uint16_t* word, size_t len,
+        const icu::Locale& locale) {
     result->clear();
     result->resize(len);
     const size_t paddedLen = len + 2;  // start and stop code each count for 1
     if (patternData != nullptr &&
-            (int)len >= MIN_PREFIX + MIN_SUFFIX && paddedLen <= MAX_HYPHENATED_SIZE) {
+            len >= minPrefix + minSuffix && paddedLen <= MAX_HYPHENATED_SIZE) {
         uint16_t alpha_codes[MAX_HYPHENATED_SIZE];
-        if (alphabetLookup(alpha_codes, word, len)) {
-            hyphenateFromCodes(result->data(), alpha_codes, paddedLen);
+        const HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len);
+        if (hyphenValue != HyphenationType::DONT_BREAK) {
+            hyphenateFromCodes(result->data(), alpha_codes, paddedLen, hyphenValue);
             return;
         }
         // TODO: try NFC normalization
         // TODO: handle non-BMP Unicode (requires remapping of offsets)
     }
-    hyphenateSoft(result->data(), word, len);
+    // Note that we will always get here if the word contains a hyphen or a soft hyphen, because the
+    // alphabet is not expected to contain a hyphen or a soft hyphen character, so alphabetLookup
+    // would return DONT_BREAK.
+    hyphenateWithNoPatterns(result->data(), word, len, locale);
 }
 
-// If any soft hyphen is present in the word, use soft hyphens to decide hyphenation,
-// as recommended in UAX #14 (Use of Soft Hyphen)
-void Hyphenator::hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len) {
-    result[0] = 0;
+// This function determines whether a character is like U+2010 HYPHEN in
+// line breaking and usage: a character immediately after which line breaks
+// are allowed, but words containing it should not be automatically
+// hyphenated using patterns. This is a curated set, created by manually
+// inspecting all the characters that have the Unicode line breaking
+// property of BA or HY and seeing which ones are hyphens.
+bool Hyphenator::isLineBreakingHyphen(uint32_t c) {
+    return (c == 0x002D || // HYPHEN-MINUS
+            c == 0x058A || // ARMENIAN HYPHEN
+            c == 0x05BE || // HEBREW PUNCTUATION MAQAF
+            c == 0x1400 || // CANADIAN SYLLABICS HYPHEN
+            c == 0x2010 || // HYPHEN
+            c == 0x2013 || // EN DASH
+            c == 0x2027 || // HYPHENATION POINT
+            c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN
+            c == 0x2E40);  // DOUBLE HYPHEN
+}
+
+const static uint32_t HYPHEN_STR[] = {0x2010, 0};
+const static uint32_t ARMENIAN_HYPHEN_STR[] = {0x058A, 0};
+const static uint32_t MAQAF_STR[] = {0x05BE, 0};
+const static uint32_t UCAS_HYPHEN_STR[] = {0x1400, 0};
+const static uint32_t ZWJ_STR[] = {0x200D, 0};
+const static uint32_t ZWJ_AND_HYPHEN_STR[] = {0x200D, 0x2010, 0};
+
+const uint32_t* HyphenEdit::getHyphenString(uint32_t hyph) {
+    switch (hyph) {
+        case INSERT_HYPHEN_AT_END:
+        case REPLACE_WITH_HYPHEN_AT_END:
+        case INSERT_HYPHEN_AT_START:
+            return HYPHEN_STR;
+        case INSERT_ARMENIAN_HYPHEN_AT_END:
+            return ARMENIAN_HYPHEN_STR;
+        case INSERT_MAQAF_AT_END:
+            return MAQAF_STR;
+        case INSERT_UCAS_HYPHEN_AT_END:
+            return UCAS_HYPHEN_STR;
+        case INSERT_ZWJ_AND_HYPHEN_AT_END:
+            return ZWJ_AND_HYPHEN_STR;
+        case INSERT_ZWJ_AT_START:
+            return ZWJ_STR;
+        default:
+            return nullptr;
+    }
+}
+
+uint32_t HyphenEdit::editForThisLine(HyphenationType type) {
+    switch (type) {
+        case HyphenationType::DONT_BREAK:
+            return NO_EDIT;
+        case HyphenationType::BREAK_AND_INSERT_HYPHEN:
+            return INSERT_HYPHEN_AT_END;
+        case HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN:
+            return INSERT_ARMENIAN_HYPHEN_AT_END;
+        case HyphenationType::BREAK_AND_INSERT_MAQAF:
+            return INSERT_MAQAF_AT_END;
+        case HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN:
+            return INSERT_UCAS_HYPHEN_AT_END;
+        case HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN:
+            return REPLACE_WITH_HYPHEN_AT_END;
+        case HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ:
+            return INSERT_ZWJ_AND_HYPHEN_AT_END;
+        default:
+            return BREAK_AT_END;
+    }
+}
+
+uint32_t HyphenEdit::editForNextLine(HyphenationType type) {
+    switch (type) {
+        case HyphenationType::DONT_BREAK:
+            return NO_EDIT;
+        case HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE:
+            return INSERT_HYPHEN_AT_START;
+        case HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ:
+            return INSERT_ZWJ_AT_START;
+        default:
+            return BREAK_AT_START;
+    }
+}
+
+static UScriptCode getScript(uint32_t codePoint) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const UScriptCode script = uscript_getScript(static_cast<UChar32>(codePoint), &errorCode);
+    if (U_SUCCESS(errorCode)) {
+        return script;
+    } else {
+        return USCRIPT_INVALID_CODE;
+    }
+}
+
+static HyphenationType hyphenationTypeBasedOnScript(uint32_t codePoint) {
+    // Note: It's not clear what the best hyphen for Hebrew is. While maqaf is the "correct" hyphen
+    // for Hebrew, modern practice may have shifted towards Western hyphens. We use normal hyphens
+    // for now to be safe.  BREAK_AND_INSERT_MAQAF is already implemented, so if we want to switch
+    // to maqaf for Hebrew, we can simply add a condition here.
+    const UScriptCode script = getScript(codePoint);
+    if (script == USCRIPT_KANNADA
+            || script == USCRIPT_MALAYALAM
+            || script == USCRIPT_TAMIL
+            || script == USCRIPT_TELUGU) {
+        // Grantha is not included, since we don't support non-BMP hyphenation yet.
+        return HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
+    } else if (script == USCRIPT_ARMENIAN) {
+        return HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN;
+    } else if (script == USCRIPT_CANADIAN_ABORIGINAL) {
+        return HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN;
+    } else {
+        return HyphenationType::BREAK_AND_INSERT_HYPHEN;
+    }
+}
+
+static inline int32_t getJoiningType(UChar32 codepoint) {
+    return u_getIntPropertyValue(codepoint, UCHAR_JOINING_TYPE);
+}
+
+// Assumption for caller: location must be >= 2 and word[location] == CHAR_SOFT_HYPHEN.
+// This function decides if the letters before and after the hyphen should appear as joining.
+static inline HyphenationType getHyphTypeForArabic(const uint16_t* word, size_t len,
+        size_t location) {
+    ssize_t i = location;
+    int32_t type = U_JT_NON_JOINING;
+    while (static_cast<size_t>(i) < len && (type = getJoiningType(word[i])) == U_JT_TRANSPARENT) {
+        i++;
+    }
+    if (type == U_JT_DUAL_JOINING || type == U_JT_RIGHT_JOINING || type == U_JT_JOIN_CAUSING) {
+        // The next character is of the type that may join the last character. See if the last
+        // character is also of the right type.
+        i = location - 2; // Skip the soft hyphen
+        type = U_JT_NON_JOINING;
+        while (i >= 0 && (type = getJoiningType(word[i])) == U_JT_TRANSPARENT) {
+            i--;
+        }
+        if (type == U_JT_DUAL_JOINING || type == U_JT_LEFT_JOINING || type == U_JT_JOIN_CAUSING) {
+            return HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ;
+        }
+    }
+    return HyphenationType::BREAK_AND_INSERT_HYPHEN;
+}
+
+// Use various recommendations of UAX #14 Unicode Line Breaking Algorithm for hyphenating words
+// that didn't match patterns, especially words that contain hyphens or soft hyphens (See sections
+// 5.3, Use of Hyphen, and 5.4, Use of Soft Hyphen).
+void Hyphenator::hyphenateWithNoPatterns(HyphenationType* result, const uint16_t* word, size_t len,
+        const icu::Locale& locale) {
+    result[0] = HyphenationType::DONT_BREAK;
     for (size_t i = 1; i < len; i++) {
-        result[i] = word[i - 1] == CHAR_SOFT_HYPHEN;
+        const uint16_t prevChar = word[i - 1];
+        if (i > 1 && isLineBreakingHyphen(prevChar)) {
+            // Break after hyphens, but only if they don't start the word.
+
+            if ((prevChar == CHAR_HYPHEN_MINUS || prevChar == CHAR_HYPHEN)
+                    && strcmp(locale.getLanguage(), "pl") == 0
+                    && getScript(word[i]) == USCRIPT_LATIN ) {
+                // In Polish, hyphens get repeated at the next line. To be safe,
+                // we will do this only if the next character is Latin.
+                result[i] = HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE;
+            } else {
+                result[i] = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
+            }
+        } else if (i > 1 && prevChar == CHAR_SOFT_HYPHEN) {
+            // Break after soft hyphens, but only if they don't start the word (a soft hyphen
+            // starting the word doesn't give any useful break opportunities). The type of the break
+            // is based on the script of the character we break on.
+            if (getScript(word[i]) == USCRIPT_ARABIC) {
+                // For Arabic, we need to look and see if the characters around the soft hyphen
+                // actually join. If they don't, we'll just insert a normal hyphen.
+                result[i] = getHyphTypeForArabic(word, len, i);
+            } else {
+                result[i] = hyphenationTypeBasedOnScript(word[i]);
+            }
+        } else if (prevChar == CHAR_MIDDLE_DOT
+                && minPrefix < i && i <= len - minSuffix
+                && ((word[i - 2] == 'l' && word[i] == 'l')
+                        || (word[i - 2] == 'L' && word[i] == 'L'))
+                && strcmp(locale.getLanguage(), "ca") == 0) {
+            // In Catalan, "l·l" should break as "l-" on the first line
+            // and "l" on the next line.
+            result[i] = HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN;
+        } else {
+            result[i] = HyphenationType::DONT_BREAK;
+        }
      }
 }
 
-bool Hyphenator::alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len) {
+HyphenationType Hyphenator::alphabetLookup(uint16_t* alpha_codes, const uint16_t* word,
+        size_t len) {
     const Header* header = getHeader();
+    HyphenationType result = HyphenationType::BREAK_AND_INSERT_HYPHEN;
     // TODO: check header magic
     uint32_t alphabetVersion = header->alphabetVersion();
     if (alphabetVersion == 0) {
@@ -148,16 +336,19 @@
         for (size_t i = 0; i < len; i++) {
             uint16_t c = word[i];
             if (c < min_codepoint || c >= max_codepoint) {
-                return false;
+                return HyphenationType::DONT_BREAK;
             }
             uint8_t code = alphabet->data[c - min_codepoint];
             if (code == 0) {
-                return false;
+                return HyphenationType::DONT_BREAK;
+            }
+            if (result == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
+                result = hyphenationTypeBasedOnScript(c);
             }
             alpha_codes[i + 1] = code;
         }
         alpha_codes[len + 1] = 0;  // word termination
-        return true;
+        return result;
     } else if (alphabetVersion == 1) {
         const AlphabetTable1* alphabet = header->alphabetTable1();
         size_t n_entries = alphabet->n_entries;
@@ -168,18 +359,21 @@
             uint16_t c = word[i];
             auto p = std::lower_bound(begin, end, c << 11);
             if (p == end) {
-                return false;
+                return HyphenationType::DONT_BREAK;
             }
             uint32_t entry = *p;
             if (AlphabetTable1::codepoint(entry) != c) {
-                return false;
+                return HyphenationType::DONT_BREAK;
+            }
+            if (result == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
+                result = hyphenationTypeBasedOnScript(c);
             }
             alpha_codes[i + 1] = AlphabetTable1::value(entry);
         }
         alpha_codes[len + 1] = 0;
-        return true;
+        return result;
     }
-    return false;
+    return HyphenationType::DONT_BREAK;
 }
 
 /**
@@ -187,7 +381,12 @@
  * has been done by now, and all characters have been found in the alphabet.
  * Note: len here is the padded length including 0 codes at start and end.
  **/
-void Hyphenator::hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len) {
+void Hyphenator::hyphenateFromCodes(HyphenationType* result, const uint16_t* codes, size_t len,
+        HyphenationType hyphenValue) {
+    static_assert(sizeof(HyphenationType) == sizeof(uint8_t), "HyphnationType must be uint8_t.");
+    // Reuse the result array as a buffer for calculating intermediate hyphenation numbers.
+    uint8_t* buffer = reinterpret_cast<uint8_t*>(result);
+
     const Header* header = getHeader();
     const Trie* trie = header->trieTable();
     const Pattern* pattern = header->patternTable();
@@ -195,7 +394,7 @@
     uint32_t link_shift = trie->link_shift;
     uint32_t link_mask = trie->link_mask;
     uint32_t pattern_shift = trie->pattern_shift;
-    size_t maxOffset = len - MIN_SUFFIX - 1;
+    size_t maxOffset = len - minSuffix - 1;
     for (size_t i = 0; i < len - 1; i++) {
         uint32_t node = 0;  // index into Trie table
         for (size_t j = i; j < len; j++) {
@@ -209,27 +408,28 @@
             uint32_t pat_ix = trie->data[node] >> pattern_shift;
             // pat_ix contains a 3-tuple of length, shift (number of trailing zeros), and an offset
             // into the buf pool. This is the pattern for the substring (i..j) we just matched,
-            // which we combine (via point-wise max) into the result vector.
+            // which we combine (via point-wise max) into the buffer vector.
             if (pat_ix != 0) {
                 uint32_t pat_entry = pattern->data[pat_ix];
                 int pat_len = Pattern::len(pat_entry);
                 int pat_shift = Pattern::shift(pat_entry);
                 const uint8_t* pat_buf = pattern->buf(pat_entry);
                 int offset = j + 1 - (pat_len + pat_shift);
-                // offset is the index within result that lines up with the start of pat_buf
-                int start = std::max(MIN_PREFIX - offset, 0);
+                // offset is the index within buffer that lines up with the start of pat_buf
+                int start = std::max((int)minPrefix - offset, 0);
                 int end = std::min(pat_len, (int)maxOffset - offset);
                 for (int k = start; k < end; k++) {
-                    result[offset + k] = std::max(result[offset + k], pat_buf[k]);
+                    buffer[offset + k] = std::max(buffer[offset + k], pat_buf[k]);
                 }
             }
         }
     }
     // Since the above calculation does not modify values outside
-    // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0.
-    for (size_t i = MIN_PREFIX; i < maxOffset; i++) {
-        result[i] &= 1;
+    // [minPrefix, len - minSuffix], they are left as 0 = DONT_BREAK.
+    for (size_t i = minPrefix; i < maxOffset; i++) {
+        // Hyphenation opportunities happen when the hyphenation numbers are odd.
+        result[i] = (buffer[i] & 1u) ? hyphenValue : HyphenationType::DONT_BREAK;
     }
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/Layout.cpp b/libs/minikin/Layout.cpp
index 9c1d6a8..568e038 100644
--- a/libs/minikin/Layout.cpp
+++ b/libs/minikin/Layout.cpp

@@ -15,31 +15,31 @@
  */
 
 #define LOG_TAG "Minikin"
-#include <cutils/log.h>
-
-#include <math.h>
 
 #include <algorithm>
 #include <fstream>
 #include <iostream>  // for debugging
+#include <math.h>
 #include <string>
+#include <unicode/ubidi.h>
+#include <unicode/utf16.h>
 #include <vector>
 
+#include <log/log.h>
 #include <utils/JenkinsHash.h>
 #include <utils/LruCache.h>
 #include <utils/Singleton.h>
 #include <utils/String16.h>
 
-#include <unicode/ubidi.h>
 #include <hb-icu.h>
 #include <hb-ot.h>
 
 #include "FontLanguage.h"
 #include "FontLanguageListCache.h"
-#include "LayoutUtils.h"
 #include "HbFontCache.h"
+#include "LayoutUtils.h"
 #include "MinikinInternal.h"
-#include <minikin/MinikinFontFreeType.h>
+#include <minikin/Emoji.h>
 #include <minikin/Layout.h>
 
 using std::string;
@@ -47,48 +47,6 @@
 
 namespace minikin {
 
-Bitmap::Bitmap(int width, int height) : width(width), height(height) {
-    buf = new uint8_t[width * height]();
-}
-
-Bitmap::~Bitmap() {
-    delete[] buf;
-}
-
-void Bitmap::writePnm(std::ofstream &o) const {
-    o << "P5" << std::endl;
-    o << width << " " << height << std::endl;
-    o << "255" << std::endl;
-    o.write((const char *)buf, width * height);
-    o.close();
-}
-
-void Bitmap::drawGlyph(const android::GlyphBitmap& bitmap, int x, int y) {
-    int bmw = bitmap.width;
-    int bmh = bitmap.height;
-    x += bitmap.left;
-    y -= bitmap.top;
-    int x0 = std::max(0, x);
-    int x1 = std::min(width, x + bmw);
-    int y0 = std::max(0, y);
-    int y1 = std::min(height, y + bmh);
-    const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x);
-    uint8_t* dst = buf + y0 * width;
-    for (int yy = y0; yy < y1; yy++) {
-        for (int xx = x0; xx < x1; xx++) {
-            int pixel = (int)dst[xx] + (int)src[xx - x];
-            pixel = pixel > 0xff ? 0xff : pixel;
-            dst[xx] = pixel;
-        }
-        src += bmw;
-        dst += width;
-    }
-}
-
-} // namespace minikin
-
-namespace android {
-
 const int kDirection_Mask = 0x1;
 
 struct LayoutContext {
@@ -109,8 +67,9 @@
 
 class LayoutCacheKey {
 public:
-    LayoutCacheKey(const FontCollection* collection, const MinikinPaint& paint, FontStyle style,
-            const uint16_t* chars, size_t start, size_t count, size_t nchars, bool dir)
+    LayoutCacheKey(const std::shared_ptr<FontCollection>& collection, const MinikinPaint& paint,
+            FontStyle style, const uint16_t* chars, size_t start, size_t count, size_t nchars,
+            bool dir)
             : mChars(chars), mNchars(nchars),
             mStart(start), mCount(count), mId(collection->getId()), mStyle(style),
             mSize(paint.size), mScaleX(paint.scaleX), mSkewX(paint.skewX),
@@ -120,7 +79,7 @@
     }
     bool operator==(const LayoutCacheKey &other) const;
 
-    hash_t hash() const {
+    android::hash_t hash() const {
         return mHash;
     }
 
@@ -134,11 +93,11 @@
         mChars = NULL;
     }
 
-    void doLayout(Layout* layout, LayoutContext* ctx, const FontCollection* collection) const {
-        layout->setFontCollection(collection);
+    void doLayout(Layout* layout, LayoutContext* ctx,
+            const std::shared_ptr<FontCollection>& collection) const {
         layout->mAdvances.resize(mCount, 0);
         ctx->clearHbFonts();
-        layout->doLayoutRun(mChars, mStart, mCount, mNchars, mIsRtl, ctx);
+        layout->doLayoutRun(mChars, mStart, mCount, mNchars, mIsRtl, ctx, collection);
     }
 
 private:
@@ -157,12 +116,12 @@
     bool mIsRtl;
     // Note: any fields added to MinikinPaint must also be reflected here.
     // TODO: language matching (possibly integrate into style)
-    hash_t mHash;
+    android::hash_t mHash;
 
-    hash_t computeHash() const;
+    android::hash_t computeHash() const;
 };
 
-class LayoutCache : private OnEntryRemoved<LayoutCacheKey, Layout*> {
+class LayoutCache : private android::OnEntryRemoved<LayoutCacheKey, Layout*> {
 public:
     LayoutCache() : mCache(kMaxEntries) {
         mCache.setOnEntryRemovedListener(this);
@@ -172,7 +131,8 @@
         mCache.clear();
     }
 
-    Layout* get(LayoutCacheKey& key, LayoutContext* ctx, const FontCollection* collection) {
+    Layout* get(LayoutCacheKey& key, LayoutContext* ctx,
+            const std::shared_ptr<FontCollection>& collection) {
         Layout* layout = mCache.get(key);
         if (layout == NULL) {
             key.copyText();
@@ -190,7 +150,7 @@
         delete value;
     }
 
-    LruCache<LayoutCacheKey, Layout*> mCache;
+    android::LruCache<LayoutCacheKey, Layout*> mCache;
 
     //static const size_t kMaxEntries = LruCache<LayoutCacheKey, Layout*>::kUnlimitedCapacity;
 
@@ -204,7 +164,7 @@
     return 0;
 }
 
-class LayoutEngine : public Singleton<LayoutEngine> {
+class LayoutEngine : public ::android::Singleton<LayoutEngine> {
 public:
     LayoutEngine() {
         unicodeFunctions = hb_unicode_funcs_create(hb_icu_get_unicode_funcs());
@@ -220,8 +180,6 @@
     LayoutCache layoutCache;
 };
 
-ANDROID_SINGLETON_STATIC_INSTANCE(LayoutEngine);
-
 bool LayoutCacheKey::operator==(const LayoutCacheKey& other) const {
     return mId == other.mId
             && mStart == other.mStart
@@ -238,23 +196,23 @@
             && !memcmp(mChars, other.mChars, mNchars * sizeof(uint16_t));
 }
 
-hash_t LayoutCacheKey::computeHash() const {
-    uint32_t hash = JenkinsHashMix(0, mId);
-    hash = JenkinsHashMix(hash, mStart);
-    hash = JenkinsHashMix(hash, mCount);
-    hash = JenkinsHashMix(hash, hash_type(mStyle));
-    hash = JenkinsHashMix(hash, hash_type(mSize));
-    hash = JenkinsHashMix(hash, hash_type(mScaleX));
-    hash = JenkinsHashMix(hash, hash_type(mSkewX));
-    hash = JenkinsHashMix(hash, hash_type(mLetterSpacing));
-    hash = JenkinsHashMix(hash, hash_type(mPaintFlags));
-    hash = JenkinsHashMix(hash, hash_type(mHyphenEdit.hasHyphen()));
-    hash = JenkinsHashMix(hash, hash_type(mIsRtl));
-    hash = JenkinsHashMixShorts(hash, mChars, mNchars);
-    return JenkinsHashWhiten(hash);
+android::hash_t LayoutCacheKey::computeHash() const {
+    uint32_t hash = android::JenkinsHashMix(0, mId);
+    hash = android::JenkinsHashMix(hash, mStart);
+    hash = android::JenkinsHashMix(hash, mCount);
+    hash = android::JenkinsHashMix(hash, hash_type(mStyle));
+    hash = android::JenkinsHashMix(hash, hash_type(mSize));
+    hash = android::JenkinsHashMix(hash, hash_type(mScaleX));
+    hash = android::JenkinsHashMix(hash, hash_type(mSkewX));
+    hash = android::JenkinsHashMix(hash, hash_type(mLetterSpacing));
+    hash = android::JenkinsHashMix(hash, hash_type(mPaintFlags));
+    hash = android::JenkinsHashMix(hash, hash_type(mHyphenEdit.getHyphen()));
+    hash = android::JenkinsHashMix(hash, hash_type(mIsRtl));
+    hash = android::JenkinsHashMixShorts(hash, mChars, mNchars);
+    return android::JenkinsHashWhiten(hash);
 }
 
-hash_t hash_type(const LayoutCacheKey& key) {
+android::hash_t hash_type(const LayoutCacheKey& key) {
     return key.hash();
 }
 
@@ -269,10 +227,6 @@
     }
 }
 
-// Deprecated. Remove when callers are removed.
-void Layout::init() {
-}
-
 void Layout::reset() {
     mGlyphs.clear();
     mFaces.clear();
@@ -281,15 +235,10 @@
     mAdvance = 0;
 }
 
-void Layout::setFontCollection(const FontCollection* collection) {
-    mCollection = collection;
-}
-
 static hb_position_t harfbuzzGetGlyphHorizontalAdvance(hb_font_t* /* hbFont */, void* fontData,
         hb_codepoint_t glyph, void* /* userData */) {
     MinikinPaint* paint = reinterpret_cast<MinikinPaint*>(fontData);
-    MinikinFont* font = paint->font;
-    float advance = font->GetHorizontalAdvance(glyph, *paint);
+    float advance = paint->font->GetHorizontalAdvance(glyph, *paint);
     return 256 * advance + 0.5;
 }
 
@@ -301,16 +250,36 @@
     return true;
 }
 
-hb_font_funcs_t* getHbFontFuncs() {
-    static hb_font_funcs_t* hbFontFuncs = 0;
+hb_font_funcs_t* getHbFontFuncs(bool forColorBitmapFont) {
+    assertMinikinLocked();
 
-    if (hbFontFuncs == 0) {
-        hbFontFuncs = hb_font_funcs_create();
-        hb_font_funcs_set_glyph_h_advance_func(hbFontFuncs, harfbuzzGetGlyphHorizontalAdvance, 0, 0);
-        hb_font_funcs_set_glyph_h_origin_func(hbFontFuncs, harfbuzzGetGlyphHorizontalOrigin, 0, 0);
-        hb_font_funcs_make_immutable(hbFontFuncs);
+    static hb_font_funcs_t* hbFuncs = nullptr;
+    static hb_font_funcs_t* hbFuncsForColorBitmap = nullptr;
+
+    hb_font_funcs_t** funcs = forColorBitmapFont ? &hbFuncs : &hbFuncsForColorBitmap;
+    if (*funcs == nullptr) {
+        *funcs = hb_font_funcs_create();
+        if (forColorBitmapFont) {
+            // Don't override the h_advance function since we use HarfBuzz's implementation for
+            // emoji for performance reasons.
+            // Note that it is technically possible for a TrueType font to have outline and embedded
+            // bitmap at the same time. We ignore modified advances of hinted outline glyphs in that
+            // case.
+        } else {
+            // Override the h_advance function since we can't use HarfBuzz's implemenation. It may
+            // return the wrong value if the font uses hinting aggressively.
+            hb_font_funcs_set_glyph_h_advance_func(*funcs, harfbuzzGetGlyphHorizontalAdvance, 0, 0);
+        }
+        hb_font_funcs_set_glyph_h_origin_func(*funcs, harfbuzzGetGlyphHorizontalOrigin, 0, 0);
+        hb_font_funcs_make_immutable(*funcs);
     }
-    return hbFontFuncs;
+    return *funcs;
+}
+
+static bool isColorBitmapFont(hb_font_t* font) {
+    hb_face_t* face = hb_font_get_face(font);
+    HbBlob cbdt(hb_face_reference_table(face, HB_TAG('C', 'B', 'D', 'T')));
+    return cbdt.size() > 0;
 }
 
 static float HBFixedToFloat(hb_position_t v)
@@ -330,7 +299,7 @@
     }
 }
 
-int Layout::findFace(FakedFont face, LayoutContext* ctx) {
+int Layout::findFace(const FakedFont& face, LayoutContext* ctx) {
     unsigned int ix;
     for (ix = 0; ix < mFaces.size(); ix++) {
         if (mFaces[ix].font == face.font) {
@@ -342,7 +311,7 @@
     // corresponding hb_font object.
     if (ctx != NULL) {
         hb_font_t* font = getHbFontLocked(face.font);
-        hb_font_set_funcs(font, getHbFontFuncs(), &ctx->paint, 0);
+        hb_font_set_funcs(font, getHbFontFuncs(isColorBitmapFont(font)), &ctx->paint, 0);
         ctx->hbFonts.push_back(font);
     }
     return ix;
@@ -496,7 +465,8 @@
     size_t mRunCount;
     bool mIsRtl;
 
-    DISALLOW_COPY_AND_ASSIGN(BidiText);
+    BidiText(const BidiText&) = delete;
+    void operator=(const BidiText&) = delete;
 };
 
 BidiText::Iter::Iter(UBiDi* bidi, size_t start, size_t end, size_t runIndex, size_t runCount,
@@ -553,6 +523,13 @@
         return;
     }
     UErrorCode status = U_ZERO_ERROR;
+    // Set callbacks to override bidi classes of new emoji
+    ubidi_setClassCallback(mBidi, emojiBidiOverride, nullptr, nullptr, nullptr, &status);
+    if (!U_SUCCESS(status)) {
+        ALOGE("error setting bidi callback function, status = %d", status);
+        return;
+    }
+
     UBiDiLevel bidiReq = bidiFlags;
     if (bidiFlags == kBidi_Default_LTR) {
         bidiReq = UBIDI_DEFAULT_LTR;
@@ -577,8 +554,9 @@
 }
 
 void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        int bidiFlags, const FontStyle &style, const MinikinPaint &paint) {
-    AutoMutex _l(gMinikinLock);
+        int bidiFlags, const FontStyle &style, const MinikinPaint &paint,
+        const std::shared_ptr<FontCollection>& collection) {
+    android::AutoMutex _l(gMinikinLock);
 
     LayoutContext ctx;
     ctx.style = style;
@@ -589,15 +567,15 @@
 
     for (const BidiText::Iter::RunInfo& runInfo : BidiText(buf, start, count, bufSize, bidiFlags)) {
         doLayoutRunCached(buf, runInfo.mRunStart, runInfo.mRunLength, bufSize, runInfo.mIsRtl, &ctx,
-                start, mCollection, this, NULL);
+                start, collection, this, NULL);
     }
     ctx.clearHbFonts();
 }
 
 float Layout::measureText(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
         int bidiFlags, const FontStyle &style, const MinikinPaint &paint,
-        const FontCollection* collection, float* advances) {
-    AutoMutex _l(gMinikinLock);
+        const std::shared_ptr<FontCollection>& collection, float* advances) {
+    android::AutoMutex _l(gMinikinLock);
 
     LayoutContext ctx;
     ctx.style = style;
@@ -615,9 +593,9 @@
 }
 
 float Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        bool isRtl, LayoutContext* ctx, size_t dstStart, const FontCollection* collection,
-        Layout* layout, float* advances) {
-    HyphenEdit hyphen = ctx->paint.hyphenEdit;
+        bool isRtl, LayoutContext* ctx, size_t dstStart,
+        const std::shared_ptr<FontCollection>& collection, Layout* layout, float* advances) {
+    const uint32_t originalHyphen = ctx->paint.hyphenEdit.getHyphen();
     float advance = 0;
     if (!isRtl) {
         // left to right
@@ -626,8 +604,15 @@
         size_t wordend;
         for (size_t iter = start; iter < start + count; iter = wordend) {
             wordend = getNextWordBreakForCache(buf, iter, bufSize);
-            // Only apply hyphen to the last word in the string.
-            ctx->paint.hyphenEdit = wordend >= start + count ? hyphen : HyphenEdit();
+            // Only apply hyphen to the first or last word in the string.
+            uint32_t hyphen = originalHyphen;
+            if (iter != start) { // Not the first word
+                hyphen &= ~HyphenEdit::MASK_START_OF_LINE;
+            }
+            if (wordend < start + count) { // Not the last word
+                hyphen &= ~HyphenEdit::MASK_END_OF_LINE;
+            }
+            ctx->paint.hyphenEdit = hyphen;
             size_t wordcount = std::min(start + count, wordend) - iter;
             advance += doLayoutWord(buf + wordstart, iter - wordstart, wordcount,
                     wordend - wordstart, isRtl, ctx, iter - dstStart, collection, layout,
@@ -641,8 +626,15 @@
         size_t wordend = end == 0 ? 0 : getNextWordBreakForCache(buf, end - 1, bufSize);
         for (size_t iter = end; iter > start; iter = wordstart) {
             wordstart = getPrevWordBreakForCache(buf, iter, bufSize);
-            // Only apply hyphen to the last (leftmost) word in the string.
-            ctx->paint.hyphenEdit = iter == end ? hyphen : HyphenEdit();
+            // Only apply hyphen to the first (rightmost) or last (leftmost) word in the string.
+            uint32_t hyphen = originalHyphen;
+            if (wordstart > start) { // Not the first word
+                hyphen &= ~HyphenEdit::MASK_START_OF_LINE;
+            }
+            if (iter != end) { // Not the last word
+                hyphen &= ~HyphenEdit::MASK_END_OF_LINE;
+            }
+            ctx->paint.hyphenEdit = hyphen;
             size_t bufStart = std::max(start, wordstart);
             advance += doLayoutWord(buf + wordstart, bufStart - wordstart, iter - bufStart,
                     wordend - wordstart, isRtl, ctx, bufStart - dstStart, collection, layout,
@@ -654,31 +646,42 @@
 }
 
 float Layout::doLayoutWord(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        bool isRtl, LayoutContext* ctx, size_t bufStart, const FontCollection* collection,
-        Layout* layout, float* advances) {
+        bool isRtl, LayoutContext* ctx, size_t bufStart,
+        const std::shared_ptr<FontCollection>& collection, Layout* layout, float* advances) {
     LayoutCache& cache = LayoutEngine::getInstance().layoutCache;
     LayoutCacheKey key(collection, ctx->paint, ctx->style, buf, start, count, bufSize, isRtl);
-    bool skipCache = ctx->paint.skipCache();
-    if (skipCache) {
+
+    float wordSpacing = count == 1 && isWordSpace(buf[start]) ? ctx->paint.wordSpacing : 0;
+
+    float advance;
+    if (ctx->paint.skipCache()) {
         Layout layoutForWord;
         key.doLayout(&layoutForWord, ctx, collection);
         if (layout) {
-            layout->appendLayout(&layoutForWord, bufStart);
+            layout->appendLayout(&layoutForWord, bufStart, wordSpacing);
         }
         if (advances) {
             layoutForWord.getAdvances(advances);
         }
-        return layoutForWord.getAdvance();
+        advance = layoutForWord.getAdvance();
     } else {
         Layout* layoutForWord = cache.get(key, ctx, collection);
         if (layout) {
-            layout->appendLayout(layoutForWord, bufStart);
+            layout->appendLayout(layoutForWord, bufStart, wordSpacing);
         }
         if (advances) {
             layoutForWord->getAdvances(advances);
         }
-        return layoutForWord->getAdvance();
+        advance = layoutForWord->getAdvance();
     }
+
+    if (wordSpacing != 0) {
+        advance += wordSpacing;
+        if (advances) {
+            advances[0] += wordSpacing;
+        }
+    }
+    return advance;
 }
 
 static void addFeatures(const string &str, vector<hb_feature_t>* features) {
@@ -702,14 +705,149 @@
     }
 }
 
+static const hb_codepoint_t CHAR_HYPHEN = 0x2010; /* HYPHEN */
+
+static inline hb_codepoint_t determineHyphenChar(hb_codepoint_t preferredHyphen, hb_font_t* font) {
+    hb_codepoint_t glyph;
+    if (preferredHyphen == 0x058A /* ARMENIAN_HYPHEN */
+                || preferredHyphen == 0x05BE /* HEBREW PUNCTUATION MAQAF */
+                || preferredHyphen == 0x1400 /* CANADIAN SYLLABIC HYPHEN */) {
+        if (hb_font_get_nominal_glyph(font, preferredHyphen, &glyph)) {
+            return preferredHyphen;
+        } else {
+            // The original hyphen requested was not supported. Let's try and see if the
+            // Unicode hyphen is supported.
+            preferredHyphen = CHAR_HYPHEN;
+        }
+    }
+    if (preferredHyphen == CHAR_HYPHEN) { /* HYPHEN */
+        // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for the preferred hyphen.
+        // Note that we intentionally don't do anything special if the font doesn't have a
+        // HYPHEN-MINUS either, so a tofu could be shown, hinting towards something missing.
+        if (!hb_font_get_nominal_glyph(font, preferredHyphen, &glyph)) {
+            return 0x002D; // HYPHEN-MINUS
+        }
+    }
+    return preferredHyphen;
+}
+
+static inline void addHyphenToHbBuffer(hb_buffer_t* buffer, hb_font_t* font, uint32_t hyphen,
+        uint32_t cluster) {
+    const uint32_t* hyphenStr = HyphenEdit::getHyphenString(hyphen);
+    while (*hyphenStr != 0) {
+        hb_codepoint_t hyphenChar = determineHyphenChar(*hyphenStr, font);
+        hb_buffer_add(buffer, hyphenChar, cluster);
+        hyphenStr++;
+    }
+}
+
+// Returns the cluster value assigned to the first codepoint added to the buffer, which can be used
+// to translate cluster values returned by HarfBuzz to input indices.
+static inline uint32_t addToHbBuffer(hb_buffer_t* buffer,
+        const uint16_t* buf, size_t start, size_t count, size_t bufSize,
+        ssize_t scriptRunStart, ssize_t scriptRunEnd,
+        HyphenEdit hyphenEdit, hb_font_t* hbFont) {
+
+    // Only hyphenate the very first script run for starting hyphens.
+    const uint32_t startHyphen = (scriptRunStart == 0)
+            ? hyphenEdit.getStart()
+            : HyphenEdit::NO_EDIT;
+    // Only hyphenate the very last script run for ending hyphens.
+    const uint32_t endHyphen = (static_cast<size_t>(scriptRunEnd) == count)
+            ? hyphenEdit.getEnd()
+            : HyphenEdit::NO_EDIT;
+
+    // In the following code, we drop the pre-context and/or post-context if there is a
+    // hyphen edit at that end. This is not absolutely necessary, since HarfBuzz uses
+    // contexts only for joining scripts at the moment, e.g. to determine if the first or
+    // last letter of a text range to shape should take a joining form based on an
+    // adjacent letter or joiner (that comes from the context).
+    //
+    // TODO: Revisit this for:
+    // 1. Desperate breaks for joining scripts like Arabic (where it may be better to keep
+    //    the context);
+    // 2. Special features like start-of-word font features (not implemented in HarfBuzz
+    //    yet).
+
+    // We don't have any start-of-line replacement edit yet, so we don't need to check for
+    // those.
+    if (HyphenEdit::isInsertion(startHyphen)) {
+        // A cluster value of zero guarantees that the inserted hyphen will be in the same
+        // cluster with the next codepoint, since there is no pre-context.
+        addHyphenToHbBuffer(buffer, hbFont, startHyphen, 0 /* cluster value */);
+    }
+
+    const uint16_t* hbText;
+    int hbTextLength;
+    unsigned int hbItemOffset;
+    unsigned int hbItemLength = scriptRunEnd - scriptRunStart; // This is >= 1.
+
+    const bool hasEndInsertion = HyphenEdit::isInsertion(endHyphen);
+    const bool hasEndReplacement = HyphenEdit::isReplacement(endHyphen);
+    if (hasEndReplacement) {
+        // Skip the last code unit while copying the buffer for HarfBuzz if it's a replacement. We
+        // don't need to worry about non-BMP characters yet since replacements are only done for
+        // code units at the moment.
+        hbItemLength -= 1;
+    }
+
+    if (startHyphen == HyphenEdit::NO_EDIT) {
+        // No edit at the beginning. Use the whole pre-context.
+        hbText = buf;
+        hbItemOffset = start + scriptRunStart;
+    } else {
+        // There's an edit at the beginning. Drop the pre-context and start the buffer at where we
+        // want to start shaping.
+        hbText = buf + start + scriptRunStart;
+        hbItemOffset = 0;
+    }
+
+    if (endHyphen == HyphenEdit::NO_EDIT) {
+        // No edit at the end, use the whole post-context.
+        hbTextLength = (buf + bufSize) - hbText;
+    } else {
+        // There is an edit at the end. Drop the post-context.
+        hbTextLength = hbItemOffset + hbItemLength;
+    }
+
+    hb_buffer_add_utf16(buffer, hbText, hbTextLength, hbItemOffset, hbItemLength);
+
+    unsigned int numCodepoints;
+    hb_glyph_info_t* cpInfo = hb_buffer_get_glyph_infos(buffer, &numCodepoints);
+
+    // Add the hyphen at the end, if there's any.
+    if (hasEndInsertion || hasEndReplacement) {
+        // When a hyphen is inserted, by assigning the added hyphen and the last
+        // codepoint added to the HarfBuzz buffer to the same cluster, we can make sure
+        // that they always remain in the same cluster, even if the last codepoint gets
+        // merged into another cluster (for example when it's a combining mark).
+        //
+        // When a replacement happens instead, we want it to get the cluster value of
+        // the character it's replacing, which is one "codepoint length" larger than
+        // the last cluster. But since the character replaced is always just one
+        // code unit, we can just add 1.
+        uint32_t hyphenCluster;
+        if (numCodepoints == 0) {
+            // Nothing was added to the HarfBuzz buffer. This can only happen if
+            // we have a replacement that is replacing a one-code unit script run.
+            hyphenCluster = 0;
+        } else {
+            hyphenCluster = cpInfo[numCodepoints - 1].cluster + (uint32_t) hasEndReplacement;
+        }
+        addHyphenToHbBuffer(buffer, hbFont, endHyphen, hyphenCluster);
+        // Since we have just added to the buffer, cpInfo no longer necessarily points to
+        // the right place. Refresh it.
+        cpInfo = hb_buffer_get_glyph_infos(buffer, nullptr /* we don't need the size */);
+    }
+    return cpInfo[0].cluster;
+}
+
+
 void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
-        bool isRtl, LayoutContext* ctx) {
+        bool isRtl, LayoutContext* ctx, const std::shared_ptr<FontCollection>& collection) {
     hb_buffer_t* buffer = LayoutEngine::getInstance().hbBuffer;
     vector<FontCollection::Run> items;
-    mCollection->itemize(buf + start, count, ctx->style, &items);
-    if (isRtl) {
-        std::reverse(items.begin(), items.end());
-    }
+    collection->itemize(buf + start, count, ctx->style, &items);
 
     vector<hb_feature_t> features;
     // Disable default-on non-required ligature features if letter-spacing
@@ -731,7 +869,9 @@
 
     float x = mAdvance;
     float y = 0;
-    for (size_t run_ix = 0; run_ix < items.size(); run_ix++) {
+    for (int run_ix = isRtl ? items.size() - 1 : 0;
+            isRtl ? run_ix >= 0 : run_ix < static_cast<int>(items.size());
+            isRtl ? --run_ix : ++run_ix) {
         FontCollection::Run &run = items[run_ix];
         if (run.fakedFont.font == NULL) {
             ALOGE("no font for run starting u+%04x length %d", buf[run.start], run.end - run.start);
@@ -748,13 +888,26 @@
         hb_font_set_ppem(hbFont, size * scaleX, size);
         hb_font_set_scale(hbFont, HBFloatToFixed(size * scaleX), HBFloatToFixed(size));
 
+        const bool is_color_bitmap_font = isColorBitmapFont(hbFont);
+
         // TODO: if there are multiple scripts within a font in an RTL run,
         // we need to reorder those runs. This is unlikely with our current
         // font stack, but should be done for correctness.
-        ssize_t srunend;
-        for (ssize_t srunstart = run.start; srunstart < run.end; srunstart = srunend) {
-            srunend = srunstart;
-            hb_script_t script = getScriptRun(buf + start, run.end, &srunend);
+
+        // Note: scriptRunStart and scriptRunEnd, as well as run.start and run.end, run between 0
+        // and count.
+        ssize_t scriptRunEnd;
+        for (ssize_t scriptRunStart = run.start;
+                scriptRunStart < run.end;
+                scriptRunStart = scriptRunEnd) {
+            scriptRunEnd = scriptRunStart;
+            hb_script_t script = getScriptRun(buf + start, run.end, &scriptRunEnd /* iterator */);
+            // After the last line, scriptRunEnd is guaranteed to have increased, since the only
+            // time getScriptRun does not increase its iterator is when it has already reached the
+            // end of the buffer. But that can't happen, since if we have already reached the end
+            // of the buffer, we should have had (scriptRunEnd == run.end), which means
+            // (scriptRunStart == run.end) which is impossible due to the exit condition of the for
+            // loop. So we can be sure that scriptRunEnd > scriptRunStart.
 
             double letterSpace = 0.0;
             double letterSpaceHalfLeft = 0.0;
@@ -784,33 +937,31 @@
                         break;
                     }
                 }
-                hb_buffer_set_language(buffer,
-                        hb_language_from_string(hbLanguage->getString().c_str(), -1));
+                hb_buffer_set_language(buffer, hbLanguage->getHbLanguage());
             }
-            hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart);
-            if (ctx->paint.hyphenEdit.hasHyphen() && srunend > srunstart) {
-                // TODO: check whether this is really the desired semantics. It could have the
-                // effect of assigning the hyphen width to a nonspacing mark
-                unsigned int lastCluster = start + srunend - 1;
 
-                hb_codepoint_t hyphenChar = 0x2010; // HYPHEN
-                hb_codepoint_t glyph;
-                // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for HYPHEN. Note
-                // that we intentionally don't do anything special if the font doesn't have a
-                // HYPHEN-MINUS either, so a tofu could be shown, hinting towards something
-                // missing.
-                if (!hb_font_get_glyph(hbFont, hyphenChar, 0, &glyph)) {
-                    hyphenChar = 0x002D; // HYPHEN-MINUS
-                }
-                hb_buffer_add(buffer, hyphenChar, lastCluster);
-            }
+            const uint32_t clusterStart = addToHbBuffer(
+                buffer,
+                buf, start, count, bufSize,
+                scriptRunStart, scriptRunEnd,
+                ctx->paint.hyphenEdit, hbFont);
+
             hb_shape(hbFont, buffer, features.empty() ? NULL : &features[0], features.size());
             unsigned int numGlyphs;
             hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer, &numGlyphs);
             hb_glyph_position_t* positions = hb_buffer_get_glyph_positions(buffer, NULL);
+
+            // At this point in the code, the cluster values in the info buffer correspond to the
+            // input characters with some shift. The cluster value clusterStart corresponds to the
+            // first character passed to HarfBuzz, which is at buf[start + scriptRunStart] whose
+            // advance needs to be saved into mAdvances[scriptRunStart]. So cluster values need to
+            // be reduced by (clusterStart - scriptRunStart) to get converted to indices of
+            // mAdvances.
+            const ssize_t clusterOffset = clusterStart - scriptRunStart;
+
             if (numGlyphs)
             {
-                mAdvances[info[0].cluster - start] += letterSpaceHalfLeft;
+                mAdvances[info[0].cluster - clusterOffset] += letterSpaceHalfLeft;
                 x += letterSpaceHalfLeft;
             }
             for (unsigned int i = 0; i < numGlyphs; i++) {
@@ -823,8 +974,8 @@
                         positions[i].x_offset, positions[i].y_offset);
 #endif
                 if (i > 0 && info[i - 1].cluster != info[i].cluster) {
-                    mAdvances[info[i - 1].cluster - start] += letterSpaceHalfRight;
-                    mAdvances[info[i].cluster - start] += letterSpaceHalfLeft;
+                    mAdvances[info[i - 1].cluster - clusterOffset] += letterSpaceHalfRight;
+                    mAdvances[info[i].cluster - clusterOffset] += letterSpaceHalfLeft;
                     x += letterSpace;
                 }
 
@@ -839,20 +990,32 @@
                     xAdvance = roundf(xAdvance);
                 }
                 MinikinRect glyphBounds;
-                ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint);
+                hb_glyph_extents_t extents = {};
+                if (is_color_bitmap_font && hb_font_get_glyph_extents(hbFont, glyph_ix, &extents)) {
+                    // Note that it is technically possible for a TrueType font to have outline and
+                    // embedded bitmap at the same time. We ignore modified bbox of hinted outline
+                    // glyphs in that case.
+                    glyphBounds.mLeft = roundf(HBFixedToFloat(extents.x_bearing));
+                    glyphBounds.mTop = roundf(HBFixedToFloat(-extents.y_bearing));
+                    glyphBounds.mRight = roundf(HBFixedToFloat(extents.x_bearing + extents.width));
+                    glyphBounds.mBottom =
+                            roundf(HBFixedToFloat(-extents.y_bearing - extents.height));
+                } else {
+                    ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint);
+                }
                 glyphBounds.offset(x + xoff, y + yoff);
                 mBounds.join(glyphBounds);
-                if (info[i].cluster - start < count) {
-                    mAdvances[info[i].cluster - start] += xAdvance;
+                if (static_cast<size_t>(info[i].cluster - clusterOffset) < count) {
+                    mAdvances[info[i].cluster - clusterOffset] += xAdvance;
                 } else {
                     ALOGE("cluster %zu (start %zu) out of bounds of count %zu",
-                        info[i].cluster - start, start, count);
+                        info[i].cluster - clusterOffset, start, count);
                 }
                 x += xAdvance;
             }
             if (numGlyphs)
             {
-                mAdvances[info[numGlyphs - 1].cluster - start] += letterSpaceHalfRight;
+                mAdvances[info[numGlyphs - 1].cluster - clusterOffset] += letterSpaceHalfRight;
                 x += letterSpaceHalfRight;
             }
         }
@@ -860,7 +1023,7 @@
     mAdvance = x;
 }
 
-void Layout::appendLayout(Layout* src, size_t start) {
+void Layout::appendLayout(Layout* src, size_t start, float extraAdvance) {
     int fontMapStack[16];
     int* fontMap;
     if (src->mFaces.size() < sizeof(fontMapStack) / sizeof(fontMapStack[0])) {
@@ -884,50 +1047,24 @@
     }
     for (size_t i = 0; i < src->mAdvances.size(); i++) {
         mAdvances[i + start] = src->mAdvances[i];
+        if (i == 0)
+          mAdvances[i + start] += extraAdvance;
     }
     MinikinRect srcBounds(src->mBounds);
     srcBounds.offset(x0, 0);
     mBounds.join(srcBounds);
-    mAdvance += src->mAdvance;
+    mAdvance += src->mAdvance + extraAdvance;
 
     if (fontMap != fontMapStack) {
         delete[] fontMap;
     }
 }
 
-void Layout::draw(minikin::Bitmap* surface, int x0, int y0, float size) const {
-    /*
-    TODO: redo as MinikinPaint settings
-    if (mProps.hasTag(minikinHinting)) {
-        int hintflags = mProps.value(minikinHinting).getIntValue();
-        if (hintflags & 1) load_flags |= FT_LOAD_NO_HINTING;
-        if (hintflags & 2) load_flags |= FT_LOAD_NO_AUTOHINT;
-    }
-    */
-    for (size_t i = 0; i < mGlyphs.size(); i++) {
-        const LayoutGlyph& glyph = mGlyphs[i];
-        MinikinFont* mf = mFaces[glyph.font_ix].font;
-        MinikinFontFreeType* face = static_cast<MinikinFontFreeType*>(mf);
-        GlyphBitmap glyphBitmap;
-        MinikinPaint paint;
-        paint.size = size;
-        bool ok = face->Render(glyph.glyph_id, paint, &glyphBitmap);
-#ifdef VERBOSE_DEBUG
-        ALOGD("glyphBitmap.width=%d, glyphBitmap.height=%d (%d, %d) x=%f, y=%f, ok=%d",
-            glyphBitmap.width, glyphBitmap.height, glyphBitmap.left, glyphBitmap.top, glyph.x, glyph.y, ok);
-#endif
-        if (ok) {
-            surface->drawGlyph(glyphBitmap,
-                x0 + int(floor(glyph.x + 0.5)), y0 + int(floor(glyph.y + 0.5)));
-        }
-    }
-}
-
 size_t Layout::nGlyphs() const {
     return mGlyphs.size();
 }
 
-MinikinFont* Layout::getFont(int i) const {
+const MinikinFont* Layout::getFont(int i) const {
     const LayoutGlyph& glyph = mGlyphs[i];
     return mFaces[glyph.font_ix].font;
 }
@@ -960,15 +1097,22 @@
     memcpy(advances, &mAdvances[0], mAdvances.size() * sizeof(float));
 }
 
-void Layout::getBounds(MinikinRect* bounds) {
+void Layout::getBounds(MinikinRect* bounds) const {
     bounds->set(mBounds);
 }
 
 void Layout::purgeCaches() {
-    AutoMutex _l(gMinikinLock);
+    android::AutoMutex _l(gMinikinLock);
     LayoutCache& layoutCache = LayoutEngine::getInstance().layoutCache;
     layoutCache.clear();
     purgeHbFontCacheLocked();
 }
 
+}  // namespace minikin
+
+// Unable to define the static data member outside of android.
+// TODO: introduce our own Singleton to drop android namespace.
+namespace android {
+ANDROID_SINGLETON_STATIC_INSTANCE(minikin::LayoutEngine);
 }  // namespace android
+

diff --git a/libs/minikin/LayoutUtils.cpp b/libs/minikin/LayoutUtils.cpp
index 4182682..a3238d4 100644
--- a/libs/minikin/LayoutUtils.cpp
+++ b/libs/minikin/LayoutUtils.cpp

@@ -18,13 +18,24 @@
 
 #include "LayoutUtils.h"
 
+namespace minikin {
+
+const uint16_t CHAR_NBSP = 0x00A0;
+
+/*
+ * Determine whether the code unit is a word space for the purposes of justification.
+ */
+bool isWordSpace(uint16_t code_unit) {
+    return code_unit == ' ' || code_unit == CHAR_NBSP;
+}
+
 /**
  * For the purpose of layout, a word break is a boundary with no
  * kerning or complex script processing. This is necessarily a
  * heuristic, but should be accurate most of the time.
  */
-static bool isWordBreakAfter(int c) {
-    if (c == ' ' || (c >= 0x2000 && c <= 0x200a) || c == 0x3000) {
+static bool isWordBreakAfter(uint16_t c) {
+    if (isWordSpace(c) || (c >= 0x2000 && c <= 0x200a) || c == 0x3000) {
         // spaces
         return true;
     }
@@ -32,7 +43,7 @@
     return false;
 }
 
-static bool isWordBreakBefore(int c) {
+static bool isWordBreakBefore(uint16_t c) {
     // CJK ideographs (and yijing hexagram symbols)
     return isWordBreakAfter(c) || (c >= 0x3400 && c <= 0x9fff);
 }
@@ -74,3 +85,5 @@
     }
     return len;
 }
+
+}  // namespace minikin

diff --git a/libs/minikin/LayoutUtils.h b/libs/minikin/LayoutUtils.h
index 83ddd0a..b89004c 100644
--- a/libs/minikin/LayoutUtils.h
+++ b/libs/minikin/LayoutUtils.h

@@ -19,6 +19,13 @@
 
 #include <stdint.h>
 
+namespace minikin {
+
+/*
+ * Determine whether the code unit is a word space for the purposes of justification.
+ */
+bool isWordSpace(uint16_t code_unit);
+
 /**
  * Return offset of previous word break. It is either < offset or == 0.
  *
@@ -39,4 +46,5 @@
 size_t getNextWordBreakForCache(
         const uint16_t* chars, size_t offset, size_t len);
 
+}  // namespace minikin
 #endif  // MINIKIN_LAYOUT_UTILS_H

diff --git a/libs/minikin/LineBreaker.cpp b/libs/minikin/LineBreaker.cpp
index 2a71f04..e75c7bf 100644
--- a/libs/minikin/LineBreaker.cpp
+++ b/libs/minikin/LineBreaker.cpp

@@ -16,17 +16,19 @@
 
 #define VERBOSE_DEBUG 0
 
+#define LOG_TAG "Minikin"
+
 #include <limits>
 
-#define LOG_TAG "Minikin"
-#include <cutils/log.h>
+#include <log/log.h>
 
+#include "LayoutUtils.h"
 #include <minikin/Layout.h>
 #include <minikin/LineBreaker.h>
 
 using std::vector;
 
-namespace android {
+namespace minikin {
 
 const int CHAR_TAB = 0x0009;
 
@@ -43,6 +45,9 @@
 // probably not the most appropriate method.
 const float LINE_PENALTY_MULTIPLIER = 2.0f;
 
+// Penalty assigned to shrinking the whitepsace.
+const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
+
 // Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
 // unreasonably long words. This is somewhat of a heuristic because extremely long words
 // are possible in some languages. This does mean that very long real words can get
@@ -53,9 +58,12 @@
 // to avoid allocation.
 const size_t MAX_TEXT_BUF_RETAIN = 32678;
 
+// Maximum amount that spaces can shrink, in justified text.
+const float SHRINKABILITY = 1.0 / 3.0;
+
 void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
     mWordBreaker.setLocale(locale);
-
+    mLocale = locale;
     mHyphenator = hyphenator;
 }
 
@@ -65,7 +73,7 @@
     // handle initial break here because addStyleRun may never be called
     mWordBreaker.next();
     mCandidates.clear();
-    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
+    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
     mCandidates.push_back(cand);
 
     // reset greedy breaker state
@@ -76,7 +84,9 @@
     mBestBreak = 0;
     mBestScore = SCORE_INFTY;
     mPreBreak = 0;
+    mLastHyphenation = HyphenEdit::NO_EDIT;
     mFirstTabIndex = INT_MAX;
+    mSpaceCount = 0;
 }
 
 void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
@@ -97,30 +107,12 @@
             c == 0x205F || c == 0x3000;
 }
 
-// This function determines whether a character is like U+2010 HYPHEN in
-// line breaking and usage: a character immediately after which line breaks
-// are allowed, but words containing it should not be automatically
-// hyphenated. This is a curated set, created by manually inspecting all
-// the characters that have the Unicode line breaking property of BA or HY
-// and seeing which ones are hyphens.
-static bool isLineBreakingHyphen(uint16_t c) {
-    return (c == 0x002D || // HYPHEN-MINUS
-            c == 0x058A || // ARMENIAN HYPHEN
-            c == 0x05BE || // HEBREW PUNCTUATION MAQAF
-            c == 0x1400 || // CANADIAN SYLLABICS HYPHEN
-            c == 0x2010 || // HYPHEN
-            c == 0x2013 || // EN DASH
-            c == 0x2027 || // HYPHENATION POINT
-            c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN
-            c == 0x2E40);  // DOUBLE HYPHEN
-}
-
 // Ordinarily, this method measures the text in the range given. However, when paint
 // is nullptr, it assumes the widths have already been calculated and stored in the
 // width buffer.
 // This method finds the candidate word breaks (using the ICU break iterator) and sends them
 // to addCandidate.
-float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
+float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
         FontStyle style, size_t start, size_t end, bool isRtl) {
     float width = 0.0f;
     int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
@@ -136,7 +128,14 @@
             hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
         }
 
-        mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
+        if (mJustified) {
+            // Make hyphenation more aggressive for fully justified text (so that "normal" in
+            // justified mode is the same as "full" in ragged-right).
+            hyphenPenalty *= 0.25;
+        } else {
+            // Line penalty is zero for justified text.
+            mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
+        }
     }
 
     size_t current = (size_t)mWordBreaker.current();
@@ -144,7 +143,7 @@
     size_t lastBreak = start;
     ParaWidth lastBreakWidth = mWidth;
     ParaWidth postBreak = mWidth;
-    bool temporarilySkipHyphenation = false;
+    size_t postSpaceCount = mSpaceCount;
     for (size_t i = start; i < end; i++) {
         uint16_t c = mTextBuf[i];
         if (c == CHAR_TAB) {
@@ -155,28 +154,31 @@
             // fall back to greedy; other modes don't know how to deal with tabs
             mStrategy = kBreakStrategy_Greedy;
         } else {
+            if (isWordSpace(c)) mSpaceCount += 1;
             mWidth += mCharWidths[i];
             if (!isLineEndSpace(c)) {
                 postBreak = mWidth;
+                postSpaceCount = mSpaceCount;
                 afterWord = i + 1;
             }
         }
         if (i + 1 == current) {
-            // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so
-            // we can pass the whole word down to Hyphenator like the soft hyphen case.
-            bool wordEndsInHyphen = isLineBreakingHyphen(c);
             size_t wordStart = mWordBreaker.wordStart();
             size_t wordEnd = mWordBreaker.wordEnd();
             if (paint != nullptr && mHyphenator != nullptr &&
                     mHyphenationFrequency != kHyphenationFrequency_None &&
-                    !wordEndsInHyphen && !temporarilySkipHyphenation &&
                     wordStart >= start && wordEnd > wordStart &&
                     wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
-                mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[wordStart], wordEnd - wordStart);
+                mHyphenator->hyphenate(&mHyphBuf,
+                        &mTextBuf[wordStart],
+                        wordEnd - wordStart,
+                        mLocale);
 #if VERBOSE_DEBUG
                 std::string hyphenatedString;
                 for (size_t j = wordStart; j < wordEnd; j++) {
-                    if (mHyphBuf[j - wordStart]) hyphenatedString.push_back('-');
+                    if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
+                        hyphenatedString.push_back('-');
+                    }
                     // Note: only works with ASCII, should do UTF-8 conversion here
                     hyphenatedString.push_back(buffer()[j]);
                 }
@@ -185,31 +187,33 @@
 
                 // measure hyphenated substrings
                 for (size_t j = wordStart; j < wordEnd; j++) {
-                    uint8_t hyph = mHyphBuf[j - wordStart];
-                    if (hyph) {
-                        paint->hyphenEdit = hyph;
-
+                    HyphenationType hyph = mHyphBuf[j - wordStart];
+                    if (hyph != HyphenationType::DONT_BREAK) {
+                        paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
                         const float firstPartWidth = Layout::measureText(mTextBuf.data(),
                                 lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
                                 *paint, typeface, nullptr);
                         ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
-                        paint->hyphenEdit = 0;
 
-                        const float secondPartWith = Layout::measureText(mTextBuf.data(), j,
+                        paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
+                        const float secondPartWidth = Layout::measureText(mTextBuf.data(), j,
                                 afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
                                 typeface, nullptr);
-                        ParaWidth hyphPreBreak = postBreak - secondPartWith;
-                        addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
+                        ParaWidth hyphPreBreak = postBreak - secondPartWidth;
+
+                        addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount,
+                                hyphenPenalty, hyph);
+
+                        paint->hyphenEdit = HyphenEdit::NO_EDIT;
                     }
                 }
             }
-            // Skip hyphenating the next word if and only if the present word ends in a hyphen
-            temporarilySkipHyphenation = wordEndsInHyphen;
 
             // Skip break for zero-width characters inside replacement span
             if (paint != nullptr || current == end || mCharWidths[current] > 0) {
                 float penalty = hyphenPenalty * mWordBreaker.breakBadness();
-                addWordBreak(current, mWidth, postBreak, penalty, 0);
+                addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty,
+                        HyphenationType::DONT_BREAK);
             }
             lastBreak = current;
             lastBreakWidth = mWidth;
@@ -223,7 +227,7 @@
 // add a word break (possibly for a hyphenated fragment), and add desperate breaks if
 // needed (ie when word exceeds current line width)
 void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
-        float penalty, uint8_t hyph) {
+        size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) {
     Candidate cand;
     ParaWidth width = mCandidates.back().preBreak;
     if (postBreak - width > currentLineWidth()) {
@@ -238,8 +242,11 @@
                 cand.offset = i;
                 cand.preBreak = width;
                 cand.postBreak = width;
+                // postSpaceCount doesn't include trailing spaces
+                cand.preSpaceCount = postSpaceCount;
+                cand.postSpaceCount = postSpaceCount;
                 cand.penalty = SCORE_DESPERATE;
-                cand.hyphenEdit = 0;
+                cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
 #if VERBOSE_DEBUG
                 ALOGD("desperate cand: %zd %g:%g",
                         mCandidates.size(), cand.postBreak, cand.preBreak);
@@ -254,42 +261,79 @@
     cand.preBreak = preBreak;
     cand.postBreak = postBreak;
     cand.penalty = penalty;
-    cand.hyphenEdit = hyph;
+    cand.preSpaceCount = preSpaceCount;
+    cand.postSpaceCount = postSpaceCount;
+    cand.hyphenType = hyph;
 #if VERBOSE_DEBUG
     ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
 #endif
     addCandidate(cand);
 }
 
+// Helper method for addCandidate()
+void LineBreaker::pushGreedyBreak() {
+    const Candidate& bestCandidate = mCandidates[mBestBreak];
+    pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
+            mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
+    mBestScore = SCORE_INFTY;
+#if VERBOSE_DEBUG
+    ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
+#endif
+    mLastBreak = mBestBreak;
+    mPreBreak = bestCandidate.preBreak;
+    mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
+}
+
 // TODO performance: could avoid populating mCandidates if greedy only
 void LineBreaker::addCandidate(Candidate cand) {
-    size_t candIndex = mCandidates.size();
+    const size_t candIndex = mCandidates.size();
     mCandidates.push_back(cand);
+
+    // mLastBreak is the index of the last line break we decided to do in mCandidates,
+    // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate
+    // we have found since then, and mBestScore is its penalty.
     if (cand.postBreak - mPreBreak > currentLineWidth()) {
         // This break would create an overfull line, pick the best break and break there (greedy)
         if (mBestBreak == mLastBreak) {
+            // No good break has been found since last break. Break here.
             mBestBreak = candIndex;
         }
-        pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
-                mCandidates[mBestBreak].hyphenEdit);
-        mBestScore = SCORE_INFTY;
-#if VERBOSE_DEBUG
-        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
-#endif
-        mLastBreak = mBestBreak;
-        mPreBreak = mCandidates[mBestBreak].preBreak;
+        pushGreedyBreak();
     }
+
+    while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) {
+        // We should rarely come here. But if we are here, we have broken the line, but the
+        // remaining part still doesn't fit. We now need to break at the second best place after the
+        // last break, but we have not kept that information, so we need to go back and find it.
+        //
+        // In some really rare cases, postBreak - preBreak of a candidate itself may be over the
+        // current line width. We protect ourselves against an infinite loop in that case by
+        // checking that we have not broken the line at this candidate already.
+        for (size_t i = mLastBreak + 1; i < candIndex; i++) {
+            const float penalty = mCandidates[i].penalty;
+            if (penalty <= mBestScore) {
+                mBestBreak = i;
+                mBestScore = penalty;
+            }
+        }
+        if (mBestBreak == mLastBreak) {
+            // We didn't find anything good. Break here.
+            mBestBreak = candIndex;
+        }
+        pushGreedyBreak();
+    }
+
     if (cand.penalty <= mBestScore) {
         mBestBreak = candIndex;
         mBestScore = cand.penalty;
     }
 }
 
-void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
+void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
     mBreaks.push_back(offset);
     mWidths.push_back(width);
     int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
-    flags |= hyph;
+    flags |= hyphenEdit;
     mFlags.push_back(flags);
     mFirstTabIndex = INT_MAX;
 }
@@ -300,6 +344,18 @@
     addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
 }
 
+// Get the width of a space. May return 0 if there are no spaces.
+// Note: if there are multiple different widths for spaces (for example, because of mixing of
+// fonts), it's only guaranteed to pick one.
+float LineBreaker::getSpaceWidth() const {
+    for (size_t i = 0; i < mTextBuf.size(); i++) {
+        if (isWordSpace(mTextBuf[i])) {
+            return mCharWidths[i];
+        }
+    }
+    return 0.0f;
+}
+
 float LineBreaker::currentLineWidth() const {
     return mLineWidths.getLineWidth(mBreaks.size());
 }
@@ -308,7 +364,8 @@
     // All breaks but the last have been added in addCandidate already.
     size_t nCand = mCandidates.size();
     if (nCand == 1 || mLastBreak != nCand - 1) {
-        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
+        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak,
+                mLastHyphenation);
         // don't need to update mBestScore, because we're done
 #if VERBOSE_DEBUG
         ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
@@ -328,7 +385,11 @@
         prev = mCandidates[i].prev;
         mBreaks.push_back(mCandidates[i].offset);
         mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
-        mFlags.push_back(mCandidates[i].hyphenEdit);
+        int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
+        if (prev > 0) {
+            flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
+        }
+        mFlags.push_back(flags);
     }
     std::reverse(mBreaks.begin(), mBreaks.end());
     std::reverse(mWidths.begin(), mWidths.end());
@@ -339,6 +400,10 @@
     size_t active = 0;
     size_t nCand = mCandidates.size();
     float width = mLineWidths.getLineWidth(0);
+    float shortLineFactor = mJustified ? 0.75f : 0.5f;
+    float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
+
+    // "i" iterates through candidates for the end of the line.
     for (size_t i = 1; i < nCand; i++) {
         bool atEnd = i == nCand - 1;
         float best = SCORE_INFTY;
@@ -352,6 +417,7 @@
         ParaWidth leftEdge = mCandidates[i].postBreak - width;
         float bestHope = 0;
 
+        // "j" iterates through candidates for the beginning of the line.
         for (size_t j = active; j < i; j++) {
             if (!isRectangle) {
                 size_t lineNumber = mCandidates[j].lineNumber;
@@ -376,13 +442,24 @@
             // breaks are considered.
             float widthScore = 0.0f;
             float additionalPenalty = 0.0f;
-            if (delta < 0) {
+            if ((atEnd || !mJustified) && delta < 0) {
                 widthScore = SCORE_OVERFULL;
             } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
                 // increase penalty for hyphen on last line
                 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
+                // Penalize very short (< 1 - shortLineFactor of total width) lines.
+                float underfill = delta - shortLineFactor * width;
+                widthScore = underfill > 0 ? underfill * underfill : 0;
             } else {
                 widthScore = delta * delta;
+                if (delta < 0) {
+                    if (-delta < maxShrink *
+                            (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
+                        widthScore *= SHRINK_PENALTY_MULTIPLIER;
+                    } else {
+                        widthScore = SCORE_OVERFULL;
+                    }
+                }
             }
 
             if (delta < 0) {
@@ -439,6 +516,7 @@
     mStrategy = kBreakStrategy_Greedy;
     mHyphenationFrequency = kHyphenationFrequency_Normal;
     mLinePenalty = 0.0f;
+    mJustified = false;
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/Measurement.cpp b/libs/minikin/Measurement.cpp
index 1ba6678..f0d15f2 100644
--- a/libs/minikin/Measurement.cpp
+++ b/libs/minikin/Measurement.cpp

@@ -15,15 +15,16 @@
  */
 
 #define LOG_TAG "Minikin"
-#include <cutils/log.h>
 
 #include <cmath>
 #include <unicode/uchar.h>
 
+#include <android/log.h>
+
 #include <minikin/GraphemeBreak.h>
 #include <minikin/Measurement.h>
 
-namespace android {
+namespace minikin {
 
 // These could be considered helper methods of layout, but need only be loosely coupled, so
 // are separate.
@@ -53,7 +54,8 @@
         int numGraphemeClustersAfter = 0;
         for (size_t i = lastCluster; i < nextCluster; i++) {
             bool isAfter = i >= offset;
-            if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) {
+            if (GraphemeBreak::isGraphemeBreak(
+                    advances + (start - layoutStart), buf, start, count, i)) {
                 numGraphemeClusters++;
                 if (isAfter) {
                     numGraphemeClustersAfter++;
@@ -85,7 +87,7 @@
     float x = 0.0f, xLastClusterStart = 0.0f, xSearchStart = 0.0f;
     size_t lastClusterStart = start, searchStart = start;
     for (size_t i = start; i < start + count; i++) {
-        if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) {
+        if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) {
             searchStart = lastClusterStart;
             xSearchStart = xLastClusterStart;
         }
@@ -102,7 +104,7 @@
     size_t best = searchStart;
     float bestDist = FLT_MAX;
     for (size_t i = searchStart; i <= start + count; i++) {
-        if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) {
+        if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) {
             // "getRunAdvance(layout, buf, start, count, i) - advance" but more efficient
             float delta = getRunAdvance(advances, buf, start, searchStart, count - searchStart, i)
 
@@ -119,4 +121,4 @@
     return best;
 }
 
-}
+}  // namespace minikin

diff --git a/libs/minikin/MinikinFont.cpp b/libs/minikin/MinikinFont.cpp
index ef42e9b..6bf6a4a 100644
--- a/libs/minikin/MinikinFont.cpp
+++ b/libs/minikin/MinikinFont.cpp

@@ -16,11 +16,13 @@
 
 #include <minikin/MinikinFont.h>
 #include "HbFontCache.h"
+#include "MinikinInternal.h"
 
-namespace android {
+namespace minikin {
 
 MinikinFont::~MinikinFont() {
+    android::AutoMutex _l(gMinikinLock);
     purgeHbFontLocked(this);
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/MinikinFontFreeType.cpp b/libs/minikin/MinikinFontFreeType.cpp
deleted file mode 100644
index 4a1b115..0000000
--- a/libs/minikin/MinikinFontFreeType.cpp
+++ /dev/null

@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Implementation of MinikinFont abstraction specialized for FreeType
-
-#include <stdint.h>
-
-#include <ft2build.h>
-#include FT_FREETYPE_H
-#include FT_TRUETYPE_TABLES_H
-#include FT_ADVANCES_H
-
-#include <minikin/MinikinFontFreeType.h>
-
-namespace android {
-
-int32_t MinikinFontFreeType::sIdCounter = 0;
-
-MinikinFontFreeType::MinikinFontFreeType(FT_Face typeface) : 
-    MinikinFont(sIdCounter++),
-    mTypeface(typeface) {
-}
-
-MinikinFontFreeType::~MinikinFontFreeType() {
-    FT_Done_Face(mTypeface);
-}
-
-float MinikinFontFreeType::GetHorizontalAdvance(uint32_t glyph_id,
-    const MinikinPaint &paint) const {
-    FT_Set_Pixel_Sizes(mTypeface, 0, paint.size);
-    FT_UInt32 flags = FT_LOAD_DEFAULT;  // TODO: respect hinting settings
-    FT_Fixed advance;
-    FT_Get_Advance(mTypeface, glyph_id, flags, &advance);
-    return advance * (1.0 / 65536);
-}
-
-void MinikinFontFreeType::GetBounds(MinikinRect* /* bounds */, uint32_t /* glyph_id*/,
-        const MinikinPaint& /* paint */) const {
-    // TODO: NYI
-}
-
-const void* MinikinFontFreeType::GetTable(uint32_t tag, size_t* size, MinikinDestroyFunc* destroy) {
-    FT_ULong ftsize = 0;
-    FT_Error error = FT_Load_Sfnt_Table(mTypeface, tag, 0, nullptr, &ftsize);
-    if (error != 0) {
-        return nullptr;
-    }
-    FT_Byte* buf = reinterpret_cast<FT_Byte*>(malloc(ftsize));
-    if (buf == nullptr) {
-        return nullptr;
-    }
-    error = FT_Load_Sfnt_Table(mTypeface, tag, 0, buf, &ftsize);
-    if (error != 0) {
-        free(buf);
-        return nullptr;
-    }
-    *destroy = free;
-    *size = ftsize;
-    return buf;
-}
-
-bool MinikinFontFreeType::Render(uint32_t glyph_id, const MinikinPaint& /* paint */,
-        GlyphBitmap *result) {
-    FT_Error error;
-    FT_Int32 load_flags = FT_LOAD_DEFAULT;  // TODO: respect hinting settings
-    error = FT_Load_Glyph(mTypeface, glyph_id, load_flags);
-    if (error != 0) {
-        return false;
-    }
-    error = FT_Render_Glyph(mTypeface->glyph, FT_RENDER_MODE_NORMAL);
-    if (error != 0) {
-        return false;
-    }
-    FT_Bitmap &bitmap = mTypeface->glyph->bitmap;
-    result->buffer = bitmap.buffer;
-    result->width = bitmap.width;
-    result->height = bitmap.rows;
-    result->left = mTypeface->glyph->bitmap_left;
-    result->top = mTypeface->glyph->bitmap_top;
-    return true;
-}
-
-MinikinFontFreeType* MinikinFontFreeType::GetFreeType() {
-    return this;
-}
-
-}  // namespace android

diff --git a/libs/minikin/MinikinInternal.cpp b/libs/minikin/MinikinInternal.cpp
index 5cb9491..cfa43bc 100644
--- a/libs/minikin/MinikinInternal.cpp
+++ b/libs/minikin/MinikinInternal.cpp

@@ -15,16 +15,16 @@
  */
 
 // Definitions internal to Minikin
+#define LOG_TAG "Minikin"
 
 #include "MinikinInternal.h"
 #include "HbFontCache.h"
-#include "generated/UnicodeData.h"
 
-#include <cutils/log.h>
+#include <log/log.h>
 
-namespace android {
+namespace minikin {
 
-Mutex gMinikinLock;
+android::Mutex gMinikinLock;
 
 void assertMinikinLocked() {
 #ifdef ENABLE_RACE_DETECTION
@@ -32,59 +32,7 @@
 #endif
 }
 
-bool isEmoji(uint32_t c) {
-    // U+2695 U+2640 U+2642 are not in emoji category in Unicode 9 but they are now emoji category.
-    // TODO: remove once emoji database is updated.
-    if (c == 0x2695 || c == 0x2640 || c == 0x2642) {
-        return true;
-    }
-    const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]);
-    return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c);
-}
-
-// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt
-bool isEmojiModifier(uint32_t c) {
-    return (0x1F3FB <= c && c <= 0x1F3FF);
-}
-
-// Based on Emoji_Modifier_Base from
-// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
-bool isEmojiBase(uint32_t c) {
-    if (0x261D <= c && c <= 0x270D) {
-        return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
-    } else if (0x1F385 <= c && c <= 0x1F93E) {
-        return (c == 0x1F385
-                || (0x1F3C3 <= c && c <= 0x1F3C4)
-                || (0x1F3CA <= c && c <= 0x1F3CB)
-                || (0x1F442 <= c && c <= 0x1F443)
-                || (0x1F446 <= c && c <= 0x1F450)
-                || (0x1F466 <= c && c <= 0x1F469)
-                || c == 0x1F46E
-                || (0x1F470 <= c && c <= 0x1F478)
-                || c == 0x1F47C
-                || (0x1F481 <= c && c <= 0x1F483)
-                || (0x1F485 <= c && c <= 0x1F487)
-                || c == 0x1F4AA
-                || c == 0x1F575
-                || c == 0x1F57A
-                || c == 0x1F590
-                || (0x1F595 <= c && c <= 0x1F596)
-                || (0x1F645 <= c && c <= 0x1F647)
-                || (0x1F64B <= c && c <= 0x1F64F)
-                || c == 0x1F6A3
-                || (0x1F6B4 <= c && c <= 0x1F6B6)
-                || c == 0x1F6C0
-                || (0x1F918 <= c && c <= 0x1F91E)
-                || c == 0x1F926
-                || c == 0x1F930
-                || (0x1F933 <= c && c <= 0x1F939)
-                || (0x1F93B <= c && c <= 0x1F93E));
-    } else {
-        return false;
-    }
-}
-
-hb_blob_t* getFontTable(MinikinFont* minikinFont, uint32_t tag) {
+hb_blob_t* getFontTable(const MinikinFont* minikinFont, uint32_t tag) {
     assertMinikinLocked();
     hb_font_t* font = getHbFontLocked(minikinFont);
     hb_face_t* face = hb_font_get_face(font);
@@ -93,4 +41,26 @@
     return blob;
 }
 
+inline static bool isBMPVariationSelector(uint32_t codePoint) {
+    return VS1 <= codePoint && codePoint <= VS16;
 }
+
+inline static bool isVariationSelectorSupplement(uint32_t codePoint) {
+    return VS17 <= codePoint && codePoint <= VS256;
+}
+
+uint16_t getVsIndex(uint32_t codePoint) {
+    if (isBMPVariationSelector(codePoint)) {
+        return codePoint - VS1;
+    } else if (isVariationSelectorSupplement(codePoint)) {
+        return codePoint - VS17 + 16;
+    } else {
+        return INVALID_VS_INDEX;
+    }
+}
+
+bool isVariationSelector(uint32_t codePoint) {
+    return isBMPVariationSelector(codePoint) || isVariationSelectorSupplement(codePoint);
+}
+
+}  // namespace minikin

diff --git a/libs/minikin/MinikinInternal.h b/libs/minikin/MinikinInternal.h
index 88cc947..a59e55d 100644
--- a/libs/minikin/MinikinInternal.h
+++ b/libs/minikin/MinikinInternal.h

@@ -25,34 +25,44 @@
 
 #include <minikin/MinikinFont.h>
 
-namespace android {
+namespace minikin {
 
 // All external Minikin interfaces are designed to be thread-safe.
 // Presently, that's implemented by through a global lock, and having
 // all external interfaces take that lock.
 
-extern Mutex gMinikinLock;
+extern android::Mutex gMinikinLock;
 
 // Aborts if gMinikinLock is not acquired. Do nothing on the release build.
 void assertMinikinLocked();
 
-// Returns true if c is emoji.
-bool isEmoji(uint32_t c);
+hb_blob_t* getFontTable(const MinikinFont* minikinFont, uint32_t tag);
 
-// Returns true if c is emoji modifier base.
-bool isEmojiBase(uint32_t c);
+constexpr uint32_t MAX_UNICODE_CODE_POINT = 0x10FFFF;
 
-// Returns true if c is emoji modifier.
-bool isEmojiModifier(uint32_t c);
+constexpr uint32_t VS1 = 0xFE00;
+constexpr uint32_t VS16 = 0xFE0F;
+constexpr uint32_t VS17 = 0xE0100;
+constexpr uint32_t VS256 = 0xE01EF;
 
-hb_blob_t* getFontTable(MinikinFont* minikinFont, uint32_t tag);
+// Returns variation selector index. This is one unit less than the variation selector number. For
+// example, VARIATION SELECTOR-25 maps to 24.
+// [0x00-0x0F] for U+FE00..U+FE0F
+// [0x10-0xFF] for U+E0100..U+E01EF
+// INVALID_VS_INDEX for other input.
+constexpr uint16_t INVALID_VS_INDEX = 0xFFFF;
+uint16_t getVsIndex(uint32_t codePoint);
+
+// Returns true if the code point is a variation selector.
+// Note that this function returns false for Mongolian free variation selectors.
+bool isVariationSelector(uint32_t codePoint);
 
 // An RAII wrapper for hb_blob_t
 class HbBlob {
 public:
     // Takes ownership of hb_blob_t object, caller is no longer
     // responsible for calling hb_blob_destroy().
-    HbBlob(hb_blob_t* blob) : mBlob(blob) {
+    explicit HbBlob(hb_blob_t* blob) : mBlob(blob) {
     }
 
     ~HbBlob() {
@@ -65,15 +75,13 @@
     }
 
     size_t size() const {
-        unsigned int length = 0;
-        hb_blob_get_data(mBlob, &length);
-        return (size_t)length;
+        return (size_t)hb_blob_get_length(mBlob);
     }
 
 private:
     hb_blob_t* mBlob;
 };
 
-}
+}  // namespace minikin
 
 #endif  // MINIKIN_INTERNAL_H

diff --git a/libs/minikin/SparseBitSet.cpp b/libs/minikin/SparseBitSet.cpp
index de07914..9fad6a0 100644
--- a/libs/minikin/SparseBitSet.cpp
+++ b/libs/minikin/SparseBitSet.cpp

@@ -14,21 +14,19 @@
  * limitations under the License.
  */
 
-#include <cutils/log.h>
+#define LOG_TAG "SparseBitSet"
+
 #include <stddef.h>
 #include <string.h>
+
+#include <log/log.h>
+
 #include <minikin/SparseBitSet.h>
 
-namespace android {
+namespace minikin {
 
 const uint32_t SparseBitSet::kNotFound;
 
-void SparseBitSet::clear() {
-    mMaxVal = 0;
-    mIndices.reset();
-    mBitmaps.reset();
-}
-
 uint32_t SparseBitSet::calcNumPages(const uint32_t* ranges, size_t nRanges) {
     bool haveZeroPage = false;
     uint32_t nonzeroPageEnd = 0;
@@ -55,17 +53,16 @@
 
 void SparseBitSet::initFromRanges(const uint32_t* ranges, size_t nRanges) {
     if (nRanges == 0) {
-        mMaxVal = 0;
-        mIndices.reset();
-        mBitmaps.reset();
         return;
     }
-    mMaxVal = ranges[nRanges * 2 - 1];
-    size_t indexSize = (mMaxVal + kPageMask) >> kLogValuesPerPage;
-    mIndices.reset(new uint32_t[indexSize]);
+    const uint32_t maxVal = ranges[nRanges * 2 - 1];
+    if (maxVal >= kMaximumCapacity) {
+        return;
+    }
+    mMaxVal = maxVal;
+    mIndices.reset(new uint16_t[(mMaxVal + kPageMask) >> kLogValuesPerPage]);
     uint32_t nPages = calcNumPages(ranges, nRanges);
-    mBitmaps.reset(new element[nPages << (kLogValuesPerPage - kLogBitsPerEl)]);
-    memset(mBitmaps.get(), 0, nPages << (kLogValuesPerPage - 3));
+    mBitmaps.reset(new element[nPages << (kLogValuesPerPage - kLogBitsPerEl)]());
     mZeroPageIndex = noZeroPage;
     uint32_t nonzeroPageEnd = 0;
     uint32_t currentPage = 0;
@@ -131,7 +128,7 @@
     }
     uint32_t maxPage = (mMaxVal + kPageMask) >> kLogValuesPerPage;
     for (uint32_t page = fromPage + 1; page < maxPage; page++) {
-        uint32_t index = mIndices[page];
+        uint16_t index = mIndices[page];
         if (index == mZeroPageIndex) {
             continue;
         }
@@ -146,4 +143,4 @@
     return kNotFound;
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp
index 38f03ca..16edca7 100644
--- a/libs/minikin/WordBreaker.cpp
+++ b/libs/minikin/WordBreaker.cpp

@@ -15,15 +15,18 @@
  */
 
 #define LOG_TAG "Minikin"
-#include <cutils/log.h>
 
+#include <android/log.h>
+
+#include <minikin/Emoji.h>
+#include <minikin/Hyphenator.h>
 #include <minikin/WordBreaker.h>
 #include "MinikinInternal.h"
 
 #include <unicode/uchar.h>
 #include <unicode/utf16.h>
 
-namespace android {
+namespace minikin {
 
 const uint32_t CHAR_SOFT_HYPHEN = 0x00AD;
 const uint32_t CHAR_ZWJ = 0x200D;
@@ -56,14 +59,6 @@
     return mCurrent;
 }
 
-enum ScanState {
-    START,
-    SAW_AT,
-    SAW_COLON,
-    SAW_COLON_SLASH,
-    SAW_COLON_SLASH_SLASH,
-};
-
 /**
  * Determine whether a line break at position i within the buffer buf is valid. This
  * represents customization beyond the ICU behavior, because plain ICU provides some
@@ -73,7 +68,8 @@
     uint32_t codePoint;
     size_t prev_offset = i;
     U16_PREV(buf, 0, prev_offset, codePoint);
-    if (codePoint == CHAR_SOFT_HYPHEN) {
+    // Do not break on hard or soft hyphens. These are handled by automatic hyphenation.
+    if (Hyphenator::isLineBreakingHyphen(codePoint) || codePoint == CHAR_SOFT_HYPHEN) {
         return false;
     }
     // For Myanmar kinzi sequences, created by <consonant, ASAT, VIRAMA, consonant>. This is to go
@@ -88,23 +84,13 @@
     size_t next_offset = i;
     U16_NEXT(buf, next_offset, bufEnd, next_codepoint);
 
-    // Proposed change to LB24 from http://www.unicode.org/L2/L2016/16043r-line-break-pr-po.txt
-    // (AL | HL) × (PR | PO)
-    int32_t lineBreak = u_getIntPropertyValue(codePoint, UCHAR_LINE_BREAK);
-    if (lineBreak == U_LB_ALPHABETIC || lineBreak == U_LB_HEBREW_LETTER) {
-        lineBreak = u_getIntPropertyValue(next_codepoint, UCHAR_LINE_BREAK);
-        if (lineBreak == U_LB_PREFIX_NUMERIC || lineBreak == U_LB_POSTFIX_NUMERIC) {
-            return false;
-        }
-    }
-
-    // Emoji ZWJ sequences.
+    // Rule LB8 for Emoji ZWJ sequences. We need to do this ourselves since we may have fresher
+    // emoji data than ICU does.
     if (codePoint == CHAR_ZWJ && isEmoji(next_codepoint)) {
         return false;
     }
 
-    // Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
-    // EB x EM
+    // Rule LB30b. We need to this ourselves since we may have fresher emoji data than ICU does.
     if (isEmojiModifier(next_codepoint)) {
         if (codePoint == 0xFE0F && prev_offset > 0) {
             // skip over emoji variation selector
@@ -117,6 +103,22 @@
     return true;
 }
 
+// Customized iteratorNext that takes care of both resets and our modifications
+// to ICU's behavior.
+int32_t WordBreaker::iteratorNext() {
+    int32_t result;
+    do {
+        if (mIteratorWasReset) {
+            result = mBreakIterator->following(mCurrent);
+            mIteratorWasReset = false;
+        } else {
+            result = mBreakIterator->next();
+        }
+    } while (!(result == icu::BreakIterator::DONE || (size_t)result == mTextSize
+            || isBreakValid(mText, mTextSize, result)));
+    return result;
+}
+
 // Chicago Manual of Style recommends breaking after these characters in URLs and email addresses
 static bool breakAfter(uint16_t c) {
     return c == ':' || c == '=' || c == '&';
@@ -128,9 +130,15 @@
             || c == '%' || c == '=' || c == '&';
 }
 
-ssize_t WordBreaker::next() {
-    mLast = mCurrent;
+enum ScanState {
+    START,
+    SAW_AT,
+    SAW_COLON,
+    SAW_COLON_SLASH,
+    SAW_COLON_SLASH_SLASH,
+};
 
+void WordBreaker::detectEmailOrUrl() {
     // scan forward from current ICU position for email address or URL
     if (mLast >= mScanOffset) {
         ScanState state = START;
@@ -155,6 +163,9 @@
         }
         if (state == SAW_AT || state == SAW_COLON_SLASH_SLASH) {
             if (!mBreakIterator->isBoundary(i)) {
+                // If there are combining marks or such at the end of the URL or the email address,
+                // consider them a part of the URL or the email, and skip to the next actual
+                // boundary.
                 i = mBreakIterator->following(i);
             }
             mInEmailOrUrl = true;
@@ -164,48 +175,46 @@
         }
         mScanOffset = i;
     }
+}
 
-    if (mInEmailOrUrl) {
-        // special rules for email addresses and URL's as per Chicago Manual of Style (16th ed.)
-        uint16_t lastChar = mText[mLast];
-        ssize_t i;
-        for (i = mLast + 1; i < mScanOffset; i++) {
-            if (breakAfter(lastChar)) {
-                break;
-            }
-            // break after double slash
-            if (lastChar == '/' && i >= mLast + 2 && mText[i - 2] == '/') {
-                break;
-            }
-            uint16_t thisChar = mText[i];
-            // never break after hyphen
-            if (lastChar != '-') {
-                if (breakBefore(thisChar)) {
-                    break;
-                }
-                // break before single slash
-                if (thisChar == '/' && lastChar != '/' &&
-                            !(i + 1 < mScanOffset && mText[i + 1] == '/')) {
-                    break;
-                }
-            }
-            lastChar = thisChar;
+ssize_t WordBreaker::findNextBreakInEmailOrUrl() {
+    // special rules for email addresses and URL's as per Chicago Manual of Style (16th ed.)
+    uint16_t lastChar = mText[mLast];
+    ssize_t i;
+    for (i = mLast + 1; i < mScanOffset; i++) {
+        if (breakAfter(lastChar)) {
+            break;
         }
-        mCurrent = i;
-        return mCurrent;
+        // break after double slash
+        if (lastChar == '/' && i >= mLast + 2 && mText[i - 2] == '/') {
+            break;
+        }
+        const uint16_t thisChar = mText[i];
+        // never break after hyphen
+        if (lastChar != '-') {
+            if (breakBefore(thisChar)) {
+                break;
+            }
+            // break before single slash
+            if (thisChar == '/' && lastChar != '/' &&
+                        !(i + 1 < mScanOffset && mText[i + 1] == '/')) {
+                break;
+            }
+        }
+        lastChar = thisChar;
     }
+    return i;
+}
 
-    int32_t result;
-    do {
-        if (mIteratorWasReset) {
-            result = mBreakIterator->following(mCurrent);
-            mIteratorWasReset = false;
-        } else {
-            result = mBreakIterator->next();
-        }
-    } while (result != icu::BreakIterator::DONE && (size_t)result != mTextSize
-            && !isBreakValid(mText, mTextSize, result));
-    mCurrent = (ssize_t)result;
+ssize_t WordBreaker::next() {
+    mLast = mCurrent;
+
+    detectEmailOrUrl();
+    if (mInEmailOrUrl) {
+        mCurrent = findNextBreakInEmailOrUrl();
+    } else {  // Business as usual
+        mCurrent = (ssize_t) iteratorNext();
+    }
     return mCurrent;
 }
 
@@ -218,7 +227,7 @@
         UChar32 c;
         ssize_t ix = result;
         U16_NEXT(mText, ix, mCurrent, c);
-        int32_t lb = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
+        const int32_t lb = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
         // strip leading punctuation, defined as OP and QU line breaking classes,
         // see UAX #14
         if (!(lb == U_LB_OPEN_PUNCTUATION || lb == U_LB_QUOTATION)) {
@@ -238,7 +247,7 @@
         UChar32 c;
         ssize_t ix = result;
         U16_PREV(mText, mLast, ix, c);
-        int32_t gc_mask = U_GET_GC_MASK(c);
+        const int32_t gc_mask = U_GET_GC_MASK(c);
         // strip trailing space and punctuation
         if ((gc_mask & (U_GC_ZS_MASK | U_GC_P_MASK)) == 0) {
             break;
@@ -258,4 +267,4 @@
     utext_close(&mUText);
 }
 
-}  // namespace android
+}  // namespace minikin

diff --git a/libs/minikin/unicode_emoji_h_gen.py b/libs/minikin/unicode_emoji_h_gen.py
deleted file mode 100644
index 7233ef6..0000000
--- a/libs/minikin/unicode_emoji_h_gen.py
+++ /dev/null

@@ -1,105 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""Generate header file for unicode data."""
-
-import optparse
-import sys
-
-
-UNICODE_EMOJI_TEMPLATE="""
-/* file generated by frameworks/minikin/lib/minikin/Android.mk */
-#ifndef MINIKIN_UNICODE_EMOJI_H
-#define MINIKIN_UNICODE_EMOJI_H
-
-#include <stdint.h>
-
-namespace android {
-namespace generated {
-
-int32_t EMOJI_LIST[] = {
-@@@EMOJI_DATA@@@
-};
-
-}  // namespace generated
-}  // namespace android
-
-#endif  // MINIKIN_UNICODE_EMOJI_H
-"""
-
-
-def _create_opt_parser():
-  parser = optparse.OptionParser()
-  parser.add_option('-i', '--input', type='str', action='store',
-                    help='path to input emoji-data.txt')
-  parser.add_option('-o', '--output', type='str', action='store',
-                    help='path to output UnicodeEmoji.h')
-  return parser
-
-
-def _read_emoji_data(emoji_data_file_path):
-  result = []
-  with open(emoji_data_file_path) as emoji_data_file:
-    for line in emoji_data_file:
-      if '#' in line:
-        line = line[:line.index('#')]  # Drop comments.
-      if not line.strip():
-        continue  # Skip empty line.
-
-      code_points, prop = line.split(';')
-      code_points = code_points.strip()
-      prop = prop.strip()
-      if prop != 'Emoji':
-        break  # Only collect Emoji property code points
-
-      if '..' in code_points:  # code point range
-        cp_start, cp_end = code_points.split('..')
-        result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1))
-      else:
-        code_point = int(code_points, 16)
-        result.append(code_point)
-  return result
-
-
-def _generate_header_contents(emoji_list):
-  INDENT = ' ' * 4
-  JOINER = ', '
-
-  hex_list = ['0x%04X' % x for x in emoji_list]
-  lines = []
-  tmp_line = '%s%s' % (INDENT, hex_list[0])
-  for hex_str in hex_list[1:]:
-    if len(tmp_line) + len(JOINER) + len(hex_str) >= 100:
-      lines.append(tmp_line + ',')
-      tmp_line = '%s%s' % (INDENT, hex_str)
-    else:
-      tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str)
-  lines.append(tmp_line)
-
-  template = UNICODE_EMOJI_TEMPLATE
-  template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines))
-  return template
-
-
-if __name__ == '__main__':
-  opt_parser = _create_opt_parser()
-  opts, _ = opt_parser.parse_args()
-
-  emoji_list = _read_emoji_data(opts.input)
-  header = _generate_header_contents(emoji_list)
-  with open(opts.output, 'w') as header_file:
-    header_file.write(header)
-

diff --git a/sample/Android.mk b/sample/Android.mk
deleted file mode 100644
index c4a644d..0000000
--- a/sample/Android.mk
+++ /dev/null

@@ -1,69 +0,0 @@
-# Copyright (C) 2013 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-LOCAL_PATH:= $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE_TAGS := tests
-
-LOCAL_C_INCLUDES += \
-	external/harfbuzz_ng/src \
-	external/freetype/include \
-	frameworks/minikin/include
-
-LOCAL_SRC_FILES:= example.cpp
-
-LOCAL_SHARED_LIBRARIES += \
-	libutils \
-	liblog \
-	libcutils \
-	libharfbuzz_ng \
-	libicuuc \
-	libft2 \
-	libpng \
-	libz \
-	libminikin
-
-LOCAL_MODULE:= minikin_example
-
-include $(BUILD_EXECUTABLE)
-
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE_TAG := tests
-
-LOCAL_C_INCLUDES += \
-	external/harfbuzz_ng/src \
-	external/freetype/include \
-	frameworks/minikin/include \
-	external/skia/src/core
-
-LOCAL_SRC_FILES:= example_skia.cpp \
-	MinikinSkia.cpp
-
-LOCAL_SHARED_LIBRARIES += \
-	libutils \
-	liblog \
-	libcutils \
-	libharfbuzz_ng \
-	libicuuc \
-	libskia \
-	libminikin \
-	libft2
-
-LOCAL_MODULE:= minikin_skia_example
-
-include $(BUILD_EXECUTABLE)

diff --git a/sample/MinikinSkia.cpp b/sample/MinikinSkia.cpp
deleted file mode 100644
index e2ecde0..0000000
--- a/sample/MinikinSkia.cpp
+++ /dev/null

@@ -1,71 +0,0 @@
-#include <SkTypeface.h>
-#include <SkPaint.h>
-
-#include <minikin/MinikinFont.h>
-#include "MinikinSkia.h"
-
-namespace android {
-
-MinikinFontSkia::MinikinFontSkia(SkTypeface *typeface) :
-    MinikinFont(typeface->uniqueID()),
-    mTypeface(typeface) {
-}
-
-MinikinFontSkia::~MinikinFontSkia() {
-    SkSafeUnref(mTypeface);
-}
-
-static void MinikinFontSkia_SetSkiaPaint(SkTypeface* typeface, SkPaint* skPaint, const MinikinPaint& paint) {
-    skPaint->setTypeface(typeface);
-    skPaint->setTextEncoding(SkPaint::kGlyphID_TextEncoding);
-    // TODO: set more paint parameters from Minikin
-    skPaint->setTextSize(paint.size);
-}
-
-float MinikinFontSkia::GetHorizontalAdvance(uint32_t glyph_id,
-    const MinikinPaint &paint) const {
-    SkPaint skPaint;
-    uint16_t glyph16 = glyph_id;
-    SkScalar skWidth;
-    MinikinFontSkia_SetSkiaPaint(mTypeface, &skPaint, paint);
-    skPaint.getTextWidths(&glyph16, sizeof(glyph16), &skWidth, NULL);
-#ifdef VERBOSE
-    ALOGD("width for typeface %d glyph %d = %f", mTypeface->uniqueID(), glyph_id
-#endif
-    return skWidth;
-}
-
-void MinikinFontSkia::GetBounds(MinikinRect* bounds, uint32_t glyph_id,
-    const MinikinPaint& paint) const {
-    SkPaint skPaint;
-    uint16_t glyph16 = glyph_id;
-    SkRect skBounds;
-    MinikinFontSkia_SetSkiaPaint(mTypeface, &skPaint, paint);
-    skPaint.getTextWidths(&glyph16, sizeof(glyph16), NULL, &skBounds);
-    bounds->mLeft = skBounds.fLeft;
-    bounds->mTop = skBounds.fTop;
-    bounds->mRight = skBounds.fRight;
-    bounds->mBottom = skBounds.fBottom;
-}
-
-const void* MinikinFontSkia::GetTable(uint32_t tag, size_t* size, MinikinDestroyFunc* destroy) {
-    // we don't have a buffer to the font data, copy to own buffer
-    const size_t tableSize = mTypeface->getTableSize(tag);
-    *size = tableSize;
-    if (tableSize == 0) {
-        return nullptr;
-    }
-    void* buf = malloc(tableSize);
-    if (buf == nullptr) {
-        return nullptr;
-    }
-    mTypeface->getTableData(tag, 0, tableSize, buf);
-    *destroy = free;
-    return buf;
-}
-
-SkTypeface *MinikinFontSkia::GetSkTypeface() {
-    return mTypeface;
-}
-
-}

diff --git a/sample/MinikinSkia.h b/sample/MinikinSkia.h
deleted file mode 100644
index 6eb9065..0000000
--- a/sample/MinikinSkia.h
+++ /dev/null

@@ -1,24 +0,0 @@
-namespace android {
-
-class MinikinFontSkia : public MinikinFont {
-public:
-    explicit MinikinFontSkia(SkTypeface *typeface);
-
-    ~MinikinFontSkia();
-
-    float GetHorizontalAdvance(uint32_t glyph_id,
-        const MinikinPaint &paint) const;
-
-    void GetBounds(MinikinRect* bounds, uint32_t glyph_id,
-        const MinikinPaint& paint) const;
-
-    const void* GetTable(uint32_t tag, size_t* size, MinikinDestroyFunc* destroy);
-
-    SkTypeface *GetSkTypeface();
-
-private:
-    SkTypeface *mTypeface;
-
-};
-
-}  // namespace android

diff --git a/sample/example.cpp b/sample/example.cpp
deleted file mode 100644
index f4c6a07..0000000
--- a/sample/example.cpp
+++ /dev/null

@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This is a test program that uses Minikin to layout and draw some text.
-// At the moment, it just draws a string into /data/local/tmp/foo.pgm.
-
-#include <stdio.h>
-#include <vector>
-#include <fstream>
-
-#include <unicode/unistr.h>
-#include <unicode/utf16.h>
-
-#include <minikin/MinikinFontFreeType.h>
-#include <minikin/Layout.h>
-
-using std::vector;
-using namespace android;
-using namespace minikin;
-
-FT_Library library;  // TODO: this should not be a global
-
-FontCollection *makeFontCollection() {
-    vector<FontFamily *>typefaces;
-    const char *fns[] = {
-        "/system/fonts/Roboto-Regular.ttf",
-        "/system/fonts/Roboto-Italic.ttf",
-        "/system/fonts/Roboto-BoldItalic.ttf",
-        "/system/fonts/Roboto-Light.ttf",
-        "/system/fonts/Roboto-Thin.ttf",
-        "/system/fonts/Roboto-Bold.ttf",
-        "/system/fonts/Roboto-ThinItalic.ttf",
-        "/system/fonts/Roboto-LightItalic.ttf"
-    };
-
-    FontFamily *family = new FontFamily();
-    FT_Face face;
-    FT_Error error;
-    for (size_t i = 0; i < sizeof(fns)/sizeof(fns[0]); i++) {
-        const char *fn = fns[i];
-        printf("adding %s\n", fn);
-        error = FT_New_Face(library, fn, 0, &face);
-        if (error != 0) {
-            printf("error loading %s, %d\n", fn, error);
-        }
-        MinikinFont *font = new MinikinFontFreeType(face);
-        family->addFont(font);
-    }
-    typefaces.push_back(family);
-
-#if 1
-    family = new FontFamily();
-    const char *fn = "/system/fonts/DroidSansDevanagari-Regular.ttf";
-    error = FT_New_Face(library, fn, 0, &face);
-    MinikinFont *font = new MinikinFontFreeType(face);
-    family->addFont(font);
-    typefaces.push_back(family);
-#endif
-
-    return new FontCollection(typefaces);
-}
-
-int runMinikinTest() {
-    FT_Error error = FT_Init_FreeType(&library);
-    if (error) {
-        return -1;
-    }
-    Layout::init();
-
-    FontCollection *collection = makeFontCollection();
-    Layout layout;
-    layout.setFontCollection(collection);
-    const char *text = "fine world \xe0\xa4\xa8\xe0\xa4\xae\xe0\xa4\xb8\xe0\xa5\x8d\xe0\xa4\xa4\xe0\xa5\x87";
-    int bidiFlags = 0;
-    FontStyle fontStyle;
-    MinikinPaint paint;
-    paint.size = 32;
-    icu::UnicodeString icuText = icu::UnicodeString::fromUTF8(text);
-    layout.doLayout(icuText.getBuffer(), 0, icuText.length(), icuText.length(), bidiFlags, fontStyle, paint);
-    layout.dump();
-    Bitmap bitmap(250, 50);
-    layout.draw(&bitmap, 10, 40, 32);
-    std::ofstream o;
-    o.open("/data/local/tmp/foo.pgm", std::ios::out | std::ios::binary);
-    bitmap.writePnm(o);
-    return 0;
-}
-
-int main() {
-    return runMinikinTest();
-}

diff --git a/sample/example_skia.cpp b/sample/example_skia.cpp
deleted file mode 100644
index f892b8c..0000000
--- a/sample/example_skia.cpp
+++ /dev/null

@@ -1,152 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This is a test program that uses Minikin to layout and draw some text.
-// At the moment, it just draws a string into /data/local/tmp/foo.pgm.
-
-#include <stdio.h>
-#include <vector>
-#include <fstream>
-
-#include <unicode/unistr.h>
-#include <unicode/utf16.h>
-
-#include <minikin/MinikinFontFreeType.h>
-#include <minikin/Layout.h>
-
-#include <SkCanvas.h>
-#include <SkGraphics.h>
-#include <SkImageEncoder.h>
-#include <SkTypeface.h>
-#include <SkPaint.h>
-
-#include "MinikinSkia.h"
-
-using std::vector;
-
-namespace android {
-
-FT_Library library;  // TODO: this should not be a global
-
-FontCollection *makeFontCollection() {
-    vector<FontFamily *>typefaces;
-    const char *fns[] = {
-        "/system/fonts/Roboto-Regular.ttf",
-        "/system/fonts/Roboto-Italic.ttf",
-        "/system/fonts/Roboto-BoldItalic.ttf",
-        "/system/fonts/Roboto-Light.ttf",
-        "/system/fonts/Roboto-Thin.ttf",
-        "/system/fonts/Roboto-Bold.ttf",
-        "/system/fonts/Roboto-ThinItalic.ttf",
-        "/system/fonts/Roboto-LightItalic.ttf"
-    };
-
-    FontFamily *family = new FontFamily();
-    for (size_t i = 0; i < sizeof(fns)/sizeof(fns[0]); i++) {
-        const char *fn = fns[i];
-        SkTypeface *skFace = SkTypeface::CreateFromFile(fn);
-        MinikinFont *font = new MinikinFontSkia(skFace);
-        family->addFont(font);
-    }
-    typefaces.push_back(family);
-
-#if 1
-    family = new FontFamily();
-    const char *fn = "/system/fonts/DroidSansDevanagari-Regular.ttf";
-    SkTypeface *skFace = SkTypeface::CreateFromFile(fn);
-    MinikinFont *font = new MinikinFontSkia(skFace);
-    family->addFont(font);
-    typefaces.push_back(family);
-#endif
-
-    return new FontCollection(typefaces);
-}
-
-// Maybe move to MinikinSkia (esp. instead of opening GetSkTypeface publicly)?
-
-void drawToSkia(SkCanvas *canvas, SkPaint *paint, Layout *layout, float x, float y) {
-    size_t nGlyphs = layout->nGlyphs();
-    uint16_t *glyphs = new uint16_t[nGlyphs];
-    SkPoint *pos = new SkPoint[nGlyphs];
-    SkTypeface *lastFace = NULL;
-    SkTypeface *skFace = NULL;
-    size_t start = 0;
-
-    paint->setTextEncoding(SkPaint::kGlyphID_TextEncoding);
-    for (size_t i = 0; i < nGlyphs; i++) {
-        MinikinFontSkia *mfs = static_cast<MinikinFontSkia *>(layout->getFont(i));
-        skFace = mfs->GetSkTypeface();
-        glyphs[i] = layout->getGlyphId(i);
-        pos[i].fX = x + layout->getX(i);
-        pos[i].fY = y + layout->getY(i);
-        if (i > 0 && skFace != lastFace) {
-            paint->setTypeface(lastFace);
-            canvas->drawPosText(glyphs + start, (i - start) << 1, pos + start, *paint);
-            start = i;
-        }
-        lastFace = skFace;
-    }
-    paint->setTypeface(skFace);
-    canvas->drawPosText(glyphs + start, (nGlyphs - start) << 1, pos + start, *paint);
-    delete[] glyphs;
-    delete[] pos;
-}
-
-int runMinikinTest() {
-    FT_Error error = FT_Init_FreeType(&library);
-    if (error) {
-        return -1;
-    }
-    Layout::init();
-
-    FontCollection *collection = makeFontCollection();
-    Layout layout;
-    layout.setFontCollection(collection);
-    const char *text = "fine world \xe0\xa4\xa8\xe0\xa4\xae\xe0\xa4\xb8\xe0\xa5\x8d\xe0\xa4\xa4\xe0\xa5\x87";
-    int bidiFlags = 0;
-    FontStyle fontStyle(7);
-    MinikinPaint minikinPaint;
-    minikinPaint.size = 32;
-    icu::UnicodeString icuText = icu::UnicodeString::fromUTF8(text);
-    layout.doLayout(icuText.getBuffer(), 0, icuText.length(), icuText.length(), bidiFlags, fontStyle, minikinPaint);
-    layout.dump();
-
-    SkAutoGraphics ag;
-
-    int width = 800;
-    int height = 600;
-    SkBitmap bitmap;
-    bitmap.allocN32Pixels(width, height);
-    SkCanvas canvas(bitmap);
-    SkPaint paint;
-    paint.setARGB(255, 0, 0, 128);
-    paint.setStyle(SkPaint::kStroke_Style);
-    paint.setStrokeWidth(2);
-    paint.setTextSize(100);
-    paint.setAntiAlias(true);
-    canvas.drawLine(10, 300, 10 + layout.getAdvance(), 300, paint);
-    paint.setStyle(SkPaint::kFill_Style);
-    drawToSkia(&canvas, &paint, &layout, 10, 300);
-
-    SkImageEncoder::EncodeFile("/data/local/tmp/foo.png", bitmap, SkImageEncoder::kPNG_Type, 100);
-    return 0;
-}
-
-}
-
-int main(int argc, const char** argv) {
-    return android::runMinikinTest();
-}

diff --git a/tests/FontCollectionTest.cpp b/tests/FontCollectionTest.cpp
deleted file mode 100644
index fa95242..0000000
--- a/tests/FontCollectionTest.cpp
+++ /dev/null

@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <minikin/FontCollection.h>
-#include "FontTestUtils.h"
-#include "MinikinFontForTest.h"
-#include "MinikinInternal.h"
-
-namespace android {
-
-// The test font has following glyphs.
-// U+82A6
-// U+82A6 U+FE00 (VS1)
-// U+82A6 U+E0100 (VS17)
-// U+82A6 U+E0101 (VS18)
-// U+82A6 U+E0102 (VS19)
-// U+845B
-// U+845B U+FE01 (VS2)
-// U+845B U+E0101 (VS18)
-// U+845B U+E0102 (VS19)
-// U+845B U+E0103 (VS20)
-// U+537F
-// U+717D U+FE02 (VS3)
-// U+717D U+E0102 (VS19)
-// U+717D U+E0103 (VS20)
-const char kVsTestFont[] = kTestFontDir "/VarioationSelectorTest-Regular.ttf";
-
-void expectVSGlyphs(const FontCollection& fc, uint32_t codepoint, const std::set<uint32_t>& vsSet) {
-    for (uint32_t vs = 0xFE00; vs <= 0xE01EF; ++vs) {
-        // Move to variation selectors supplements after variation selectors.
-        if (vs == 0xFF00) {
-            vs = 0xE0100;
-        }
-        if (vsSet.find(vs) == vsSet.end()) {
-            EXPECT_FALSE(fc.hasVariationSelector(codepoint, vs))
-                << "Glyph for U+" << std::hex << codepoint << " U+" << vs;
-        } else {
-            EXPECT_TRUE(fc.hasVariationSelector(codepoint, vs))
-                << "Glyph for U+" << std::hex << codepoint << " U+" << vs;
-        }
-    }
-}
-
-TEST(FontCollectionTest, hasVariationSelectorTest) {
-  FontFamily* family = new FontFamily();
-  family->addFont(new MinikinFontForTest(kVsTestFont));
-  std::vector<FontFamily*> families({family});
-  FontCollection fc(families);
-  family->Unref();
-
-  EXPECT_FALSE(fc.hasVariationSelector(0x82A6, 0));
-  expectVSGlyphs(fc, 0x82A6, std::set<uint32_t>({0xFE00, 0xE0100, 0xE0101, 0xE0102}));
-
-  EXPECT_FALSE(fc.hasVariationSelector(0x845B, 0));
-  expectVSGlyphs(fc, 0x845B, std::set<uint32_t>({0xFE01, 0xE0101, 0xE0102, 0xE0103}));
-
-  EXPECT_FALSE(fc.hasVariationSelector(0x537F, 0));
-  expectVSGlyphs(fc, 0x537F, std::set<uint32_t>({}));
-
-  EXPECT_FALSE(fc.hasVariationSelector(0x717D, 0));
-  expectVSGlyphs(fc, 0x717D, std::set<uint32_t>({0xFE02, 0xE0102, 0xE0103}));
-}
-
-const char kEmojiXmlFile[] = kTestFontDir "emoji.xml";
-
-TEST(FontCollectionTest, hasVariationSelectorTest_emoji) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
-
-    // Both text/color font have cmap format 14 subtable entry for VS15/VS16 respectively.
-    EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0E));
-    EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0F));
-
-    // The text font has cmap format 14 subtable entry for VS15 but the color font doesn't have for
-    // VS16
-    EXPECT_TRUE(collection->hasVariationSelector(0x2626, 0xFE0E));
-    EXPECT_FALSE(collection->hasVariationSelector(0x2626, 0xFE0F));
-
-    // The color font has cmap format 14 subtable entry for VS16 but the text font doesn't have for
-    // VS15.
-    EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0E));
-    EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0F));
-
-    // Neither text/color font have cmap format 14 subtable entry for VS15/VS16.
-    EXPECT_TRUE(collection->hasVariationSelector(0x262E, 0xFE0E));
-    EXPECT_FALSE(collection->hasVariationSelector(0x262E, 0xFE0F));
-
-    // Text font doesn't have U+262F U+FE0E or even its base code point U+262F.
-    EXPECT_FALSE(collection->hasVariationSelector(0x262F, 0xFE0E));
-
-    // VS15/VS16 is only for emoji, should return false for not an emoji code point.
-    EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0E));
-    EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0F));
-
-}
-
-TEST(FontCollectionTest, newEmojiTest) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
-
-    // U+2695, U+2640, U+2642 are not in emoji catrgory in Unicode 9 but they are now in emoji
-    // category. Should return true even if U+FE0E was appended.
-    // These three emojis are only avalilable in TextEmoji.ttf but U+2695 is excluded here since it
-    // is used in other tests.
-    EXPECT_TRUE(collection->hasVariationSelector(0x2640, 0xFE0E));
-    EXPECT_FALSE(collection->hasVariationSelector(0x2640, 0xFE0F));
-    EXPECT_TRUE(collection->hasVariationSelector(0x2642, 0xFE0E));
-    EXPECT_FALSE(collection->hasVariationSelector(0x2642, 0xFE0F));
-}
-
-}  // namespace android

diff --git a/tests/FontFamilyTest.cpp b/tests/FontFamilyTest.cpp
deleted file mode 100644
index 1b24576..0000000
--- a/tests/FontFamilyTest.cpp
+++ /dev/null

@@ -1,416 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <minikin/FontFamily.h>
-
-#include <cutils/log.h>
-
-#include "FontLanguageListCache.h"
-#include "ICUTestBase.h"
-#include "MinikinFontForTest.h"
-#include "MinikinInternal.h"
-
-namespace android {
-
-typedef ICUTestBase FontLanguagesTest;
-typedef ICUTestBase FontLanguageTest;
-
-static const FontLanguages& createFontLanguages(const std::string& input) {
-    AutoMutex _l(gMinikinLock);
-    uint32_t langId = FontLanguageListCache::getId(input);
-    return FontLanguageListCache::getById(langId);
-}
-
-static FontLanguage createFontLanguage(const std::string& input) {
-    AutoMutex _l(gMinikinLock);
-    uint32_t langId = FontLanguageListCache::getId(input);
-    return FontLanguageListCache::getById(langId)[0];
-}
-
-TEST_F(FontLanguageTest, basicTests) {
-    FontLanguage defaultLang;
-    FontLanguage emptyLang("", 0);
-    FontLanguage english = createFontLanguage("en");
-    FontLanguage french = createFontLanguage("fr");
-    FontLanguage und = createFontLanguage("und");
-    FontLanguage undZsye = createFontLanguage("und-Zsye");
-
-    EXPECT_EQ(english, english);
-    EXPECT_EQ(french, french);
-
-    EXPECT_TRUE(defaultLang != defaultLang);
-    EXPECT_TRUE(emptyLang != emptyLang);
-    EXPECT_TRUE(defaultLang != emptyLang);
-    EXPECT_TRUE(defaultLang != und);
-    EXPECT_TRUE(emptyLang != und);
-    EXPECT_TRUE(english != defaultLang);
-    EXPECT_TRUE(english != emptyLang);
-    EXPECT_TRUE(english != french);
-    EXPECT_TRUE(english != undZsye);
-    EXPECT_TRUE(und != undZsye);
-    EXPECT_TRUE(english != und);
-
-    EXPECT_TRUE(defaultLang.isUnsupported());
-    EXPECT_TRUE(emptyLang.isUnsupported());
-
-    EXPECT_FALSE(english.isUnsupported());
-    EXPECT_FALSE(french.isUnsupported());
-    EXPECT_FALSE(und.isUnsupported());
-    EXPECT_FALSE(undZsye.isUnsupported());
-}
-
-TEST_F(FontLanguageTest, getStringTest) {
-    EXPECT_EQ("en-Latn", createFontLanguage("en").getString());
-    EXPECT_EQ("en-Latn", createFontLanguage("en-Latn").getString());
-
-    // Capitalized language code or lowercased script should be normalized.
-    EXPECT_EQ("en-Latn", createFontLanguage("EN-LATN").getString());
-    EXPECT_EQ("en-Latn", createFontLanguage("EN-latn").getString());
-    EXPECT_EQ("en-Latn", createFontLanguage("en-latn").getString());
-
-    // Invalid script should be kept.
-    EXPECT_EQ("en-Xyzt", createFontLanguage("en-xyzt").getString());
-
-    EXPECT_EQ("en-Latn", createFontLanguage("en-Latn-US").getString());
-    EXPECT_EQ("ja-Jpan", createFontLanguage("ja").getString());
-    EXPECT_EQ("und", createFontLanguage("und").getString());
-    EXPECT_EQ("und", createFontLanguage("UND").getString());
-    EXPECT_EQ("und", createFontLanguage("Und").getString());
-    EXPECT_EQ("und-Zsye", createFontLanguage("und-Zsye").getString());
-    EXPECT_EQ("und-Zsye", createFontLanguage("Und-ZSYE").getString());
-    EXPECT_EQ("und-Zsye", createFontLanguage("Und-zsye").getString());
-
-    EXPECT_EQ("de-Latn", createFontLanguage("de-1901").getString());
-
-    // This is not a necessary desired behavior, just known behavior.
-    EXPECT_EQ("en-Latn", createFontLanguage("und-Abcdefgh").getString());
-}
-
-TEST_F(FontLanguageTest, ScriptEqualTest) {
-    EXPECT_TRUE(createFontLanguage("en").isEqualScript(createFontLanguage("en")));
-    EXPECT_TRUE(createFontLanguage("en-Latn").isEqualScript(createFontLanguage("en")));
-    EXPECT_TRUE(createFontLanguage("jp-Latn").isEqualScript(createFontLanguage("en-Latn")));
-    EXPECT_TRUE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Jpan")));
-
-    EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Hira")));
-    EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Hani")));
-}
-
-TEST_F(FontLanguageTest, ScriptMatchTest) {
-    const bool SUPPORTED = true;
-    const bool NOT_SUPPORTED = false;
-
-    struct TestCase {
-        const std::string baseScript;
-        const std::string requestedScript;
-        bool isSupported;
-    } testCases[] = {
-        // Same scripts
-        { "en-Latn", "Latn", SUPPORTED },
-        { "ja-Jpan", "Jpan", SUPPORTED },
-        { "ja-Hira", "Hira", SUPPORTED },
-        { "ja-Kana", "Kana", SUPPORTED },
-        { "ja-Hrkt", "Hrkt", SUPPORTED },
-        { "zh-Hans", "Hans", SUPPORTED },
-        { "zh-Hant", "Hant", SUPPORTED },
-        { "zh-Hani", "Hani", SUPPORTED },
-        { "ko-Kore", "Kore", SUPPORTED },
-        { "ko-Hang", "Hang", SUPPORTED },
-        { "zh-Hanb", "Hanb", SUPPORTED },
-
-        // Japanese supports Hiragana, Katakanara, etc.
-        { "ja-Jpan", "Hira", SUPPORTED },
-        { "ja-Jpan", "Kana", SUPPORTED },
-        { "ja-Jpan", "Hrkt", SUPPORTED },
-        { "ja-Hrkt", "Hira", SUPPORTED },
-        { "ja-Hrkt", "Kana", SUPPORTED },
-
-        // Chinese supports Han.
-        { "zh-Hans", "Hani", SUPPORTED },
-        { "zh-Hant", "Hani", SUPPORTED },
-        { "zh-Hanb", "Hani", SUPPORTED },
-
-        // Hanb supports Bopomofo.
-        { "zh-Hanb", "Bopo", SUPPORTED },
-
-        // Korean supports Hangul.
-        { "ko-Kore", "Hang", SUPPORTED },
-
-        // Different scripts
-        { "ja-Jpan", "Latn", NOT_SUPPORTED },
-        { "en-Latn", "Jpan", NOT_SUPPORTED },
-        { "ja-Jpan", "Hant", NOT_SUPPORTED },
-        { "zh-Hant", "Jpan", NOT_SUPPORTED },
-        { "ja-Jpan", "Hans", NOT_SUPPORTED },
-        { "zh-Hans", "Jpan", NOT_SUPPORTED },
-        { "ja-Jpan", "Kore", NOT_SUPPORTED },
-        { "ko-Kore", "Jpan", NOT_SUPPORTED },
-        { "zh-Hans", "Hant", NOT_SUPPORTED },
-        { "zh-Hant", "Hans", NOT_SUPPORTED },
-        { "zh-Hans", "Kore", NOT_SUPPORTED },
-        { "ko-Kore", "Hans", NOT_SUPPORTED },
-        { "zh-Hant", "Kore", NOT_SUPPORTED },
-        { "ko-Kore", "Hant", NOT_SUPPORTED },
-
-        // Hiragana doesn't support Japanese, etc.
-        { "ja-Hira", "Jpan", NOT_SUPPORTED },
-        { "ja-Kana", "Jpan", NOT_SUPPORTED },
-        { "ja-Hrkt", "Jpan", NOT_SUPPORTED },
-        { "ja-Hani", "Jpan", NOT_SUPPORTED },
-        { "ja-Hira", "Hrkt", NOT_SUPPORTED },
-        { "ja-Kana", "Hrkt", NOT_SUPPORTED },
-        { "ja-Hani", "Hrkt", NOT_SUPPORTED },
-        { "ja-Hani", "Hira", NOT_SUPPORTED },
-        { "ja-Hani", "Kana", NOT_SUPPORTED },
-
-        // Kanji doesn't support Chinese, etc.
-        { "zh-Hani", "Hant", NOT_SUPPORTED },
-        { "zh-Hani", "Hans", NOT_SUPPORTED },
-        { "zh-Hani", "Hanb", NOT_SUPPORTED },
-
-        // Hangul doesn't support Korean, etc.
-        { "ko-Hang", "Kore", NOT_SUPPORTED },
-        { "ko-Hani", "Kore", NOT_SUPPORTED },
-        { "ko-Hani", "Hang", NOT_SUPPORTED },
-        { "ko-Hang", "Hani", NOT_SUPPORTED },
-
-        // Han with botomofo doesn't support simplified Chinese, etc.
-        { "zh-Hanb", "Hant", NOT_SUPPORTED },
-        { "zh-Hanb", "Hans", NOT_SUPPORTED },
-        { "zh-Hanb", "Jpan", NOT_SUPPORTED },
-        { "zh-Hanb", "Kore", NOT_SUPPORTED },
-    };
-
-    for (auto testCase : testCases) {
-        hb_script_t script = hb_script_from_iso15924_tag(
-                HB_TAG(testCase.requestedScript[0], testCase.requestedScript[1],
-                       testCase.requestedScript[2], testCase.requestedScript[3]));
-        if (testCase.isSupported) {
-            EXPECT_TRUE(
-                    createFontLanguage(testCase.baseScript).supportsHbScript(script))
-                    << testCase.baseScript << " should support " << testCase.requestedScript;
-        } else {
-            EXPECT_FALSE(
-                    createFontLanguage(testCase.baseScript).supportsHbScript(script))
-                    << testCase.baseScript << " shouldn't support " << testCase.requestedScript;
-        }
-    }
-}
-
-TEST_F(FontLanguagesTest, basicTests) {
-    FontLanguages emptyLangs;
-    EXPECT_EQ(0u, emptyLangs.size());
-
-    FontLanguage english = createFontLanguage("en");
-    const FontLanguages& singletonLangs = createFontLanguages("en");
-    EXPECT_EQ(1u, singletonLangs.size());
-    EXPECT_EQ(english, singletonLangs[0]);
-
-    FontLanguage french = createFontLanguage("fr");
-    const FontLanguages& twoLangs = createFontLanguages("en,fr");
-    EXPECT_EQ(2u, twoLangs.size());
-    EXPECT_EQ(english, twoLangs[0]);
-    EXPECT_EQ(french, twoLangs[1]);
-}
-
-TEST_F(FontLanguagesTest, unsupportedLanguageTests) {
-    const FontLanguages& oneUnsupported = createFontLanguages("abcd-example");
-    EXPECT_TRUE(oneUnsupported.empty());
-
-    const FontLanguages& twoUnsupporteds = createFontLanguages("abcd-example,abcd-example");
-    EXPECT_TRUE(twoUnsupporteds.empty());
-
-    FontLanguage english = createFontLanguage("en");
-    const FontLanguages& firstUnsupported = createFontLanguages("abcd-example,en");
-    EXPECT_EQ(1u, firstUnsupported.size());
-    EXPECT_EQ(english, firstUnsupported[0]);
-
-    const FontLanguages& lastUnsupported = createFontLanguages("en,abcd-example");
-    EXPECT_EQ(1u, lastUnsupported.size());
-    EXPECT_EQ(english, lastUnsupported[0]);
-}
-
-TEST_F(FontLanguagesTest, repeatedLanguageTests) {
-    FontLanguage english = createFontLanguage("en");
-    FontLanguage french = createFontLanguage("fr");
-    FontLanguage englishInLatn = createFontLanguage("en-Latn");
-    ASSERT_TRUE(english == englishInLatn);
-
-    const FontLanguages& langs = createFontLanguages("en,en-Latn");
-    EXPECT_EQ(1u, langs.size());
-    EXPECT_EQ(english, langs[0]);
-
-    // Country codes are ignored.
-    const FontLanguages& fr = createFontLanguages("fr,fr-CA,fr-FR");
-    EXPECT_EQ(1u, fr.size());
-    EXPECT_EQ(french, fr[0]);
-
-    // The order should be kept.
-    const FontLanguages& langs2 = createFontLanguages("en,fr,en-Latn");
-    EXPECT_EQ(2u, langs2.size());
-    EXPECT_EQ(english, langs2[0]);
-    EXPECT_EQ(french, langs2[1]);
-}
-
-TEST_F(FontLanguagesTest, undEmojiTests) {
-    FontLanguage emoji = createFontLanguage("und-Zsye");
-    EXPECT_TRUE(emoji.hasEmojiFlag());
-
-    FontLanguage und = createFontLanguage("und");
-    EXPECT_FALSE(und.hasEmojiFlag());
-    EXPECT_FALSE(emoji == und);
-
-    FontLanguage undExample = createFontLanguage("und-example");
-    EXPECT_FALSE(undExample.hasEmojiFlag());
-    EXPECT_FALSE(emoji == undExample);
-}
-
-TEST_F(FontLanguagesTest, registerLanguageListTest) {
-    EXPECT_EQ(0UL, FontStyle::registerLanguageList(""));
-    EXPECT_NE(0UL, FontStyle::registerLanguageList("en"));
-    EXPECT_NE(0UL, FontStyle::registerLanguageList("jp"));
-    EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans"));
-
-    EXPECT_EQ(FontStyle::registerLanguageList("en"), FontStyle::registerLanguageList("en"));
-    EXPECT_NE(FontStyle::registerLanguageList("en"), FontStyle::registerLanguageList("jp"));
-
-    EXPECT_EQ(FontStyle::registerLanguageList("en,zh-Hans"),
-              FontStyle::registerLanguageList("en,zh-Hans"));
-    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
-              FontStyle::registerLanguageList("zh-Hans,en"));
-    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
-              FontStyle::registerLanguageList("jp"));
-    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
-              FontStyle::registerLanguageList("en"));
-    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
-              FontStyle::registerLanguageList("en,zh-Hant"));
-}
-
-// The test font has following glyphs.
-// U+82A6
-// U+82A6 U+FE00 (VS1)
-// U+82A6 U+E0100 (VS17)
-// U+82A6 U+E0101 (VS18)
-// U+82A6 U+E0102 (VS19)
-// U+845B
-// U+845B U+FE00 (VS2)
-// U+845B U+E0101 (VS18)
-// U+845B U+E0102 (VS19)
-// U+845B U+E0103 (VS20)
-// U+537F
-// U+717D U+FE02 (VS3)
-// U+717D U+E0102 (VS19)
-// U+717D U+E0103 (VS20)
-const char kVsTestFont[] = kTestFontDir "VarioationSelectorTest-Regular.ttf";
-
-class FontFamilyTest : public ICUTestBase {
-public:
-    virtual void SetUp() override {
-        ICUTestBase::SetUp();
-        if (access(kVsTestFont, R_OK) != 0) {
-            FAIL() << "Unable to read " << kVsTestFont << ". "
-                   << "Please prepare the test data directory. "
-                   << "For more details, please see how_to_run.txt.";
-        }
-    }
-};
-
-// Asserts that the font family has glyphs for and only for specified codepoint
-// and variationSelector pairs.
-void expectVSGlyphs(FontFamily* family, uint32_t codepoint, const std::set<uint32_t>& vs) {
-    for (uint32_t i = 0xFE00; i <= 0xE01EF; ++i) {
-        // Move to variation selectors supplements after variation selectors.
-        if (i == 0xFF00) {
-            i = 0xE0100;
-        }
-        if (vs.find(i) == vs.end()) {
-            EXPECT_FALSE(family->hasGlyph(codepoint, i))
-                    << "Glyph for U+" << std::hex << codepoint << " U+" << i;
-        } else {
-            EXPECT_TRUE(family->hasGlyph(codepoint, i))
-                    << "Glyph for U+" << std::hex << codepoint << " U+" << i;
-        }
-
-    }
-}
-
-TEST_F(FontFamilyTest, hasVariationSelectorTest) {
-    MinikinAutoUnref<MinikinFontForTest> minikinFont(new MinikinFontForTest(kVsTestFont));
-    MinikinAutoUnref<FontFamily> family(new FontFamily);
-    family->addFont(minikinFont.get());
-
-    AutoMutex _l(gMinikinLock);
-
-    const uint32_t kVS1 = 0xFE00;
-    const uint32_t kVS2 = 0xFE01;
-    const uint32_t kVS3 = 0xFE02;
-    const uint32_t kVS17 = 0xE0100;
-    const uint32_t kVS18 = 0xE0101;
-    const uint32_t kVS19 = 0xE0102;
-    const uint32_t kVS20 = 0xE0103;
-
-    const uint32_t kSupportedChar1 = 0x82A6;
-    EXPECT_TRUE(family->getCoverage()->get(kSupportedChar1));
-    expectVSGlyphs(family.get(), kSupportedChar1, std::set<uint32_t>({kVS1, kVS17, kVS18, kVS19}));
-
-    const uint32_t kSupportedChar2 = 0x845B;
-    EXPECT_TRUE(family->getCoverage()->get(kSupportedChar2));
-    expectVSGlyphs(family.get(), kSupportedChar2, std::set<uint32_t>({kVS2, kVS18, kVS19, kVS20}));
-
-    const uint32_t kNoVsSupportedChar = 0x537F;
-    EXPECT_TRUE(family->getCoverage()->get(kNoVsSupportedChar));
-    expectVSGlyphs(family.get(), kNoVsSupportedChar, std::set<uint32_t>());
-
-    const uint32_t kVsOnlySupportedChar = 0x717D;
-    EXPECT_FALSE(family->getCoverage()->get(kVsOnlySupportedChar));
-    expectVSGlyphs(family.get(), kVsOnlySupportedChar, std::set<uint32_t>({kVS3, kVS19, kVS20}));
-
-    const uint32_t kNotSupportedChar = 0x845C;
-    EXPECT_FALSE(family->getCoverage()->get(kNotSupportedChar));
-    expectVSGlyphs(family.get(), kNotSupportedChar, std::set<uint32_t>());
-}
-
-TEST_F(FontFamilyTest, hasVSTableTest) {
-    struct TestCase {
-        const std::string fontPath;
-        bool hasVSTable;
-    } testCases[] = {
-        { kTestFontDir "Ja.ttf", true },
-        { kTestFontDir "ZhHant.ttf", true },
-        { kTestFontDir "ZhHans.ttf", true },
-        { kTestFontDir "Italic.ttf", false },
-        { kTestFontDir "Bold.ttf", false },
-        { kTestFontDir "BoldItalic.ttf", false },
-    };
-
-    for (auto testCase : testCases) {
-        SCOPED_TRACE(testCase.hasVSTable ?
-                "Font " + testCase.fontPath + " should have a variation sequence table." :
-                "Font " + testCase.fontPath + " shouldn't have a variation sequence table.");
-
-        MinikinAutoUnref<MinikinFontForTest> minikinFont(new MinikinFontForTest(testCase.fontPath));
-        MinikinAutoUnref<FontFamily> family(new FontFamily);
-        family->addFont(minikinFont.get());
-        AutoMutex _l(gMinikinLock);
-        family->getCoverage();
-
-        EXPECT_EQ(testCase.hasVSTable, family->hasVSTable());
-    }
-}
-
-}  // namespace android

diff --git a/tests/FontTestUtils.cpp b/tests/FontTestUtils.cpp
deleted file mode 100644
index fdc3ed6..0000000
--- a/tests/FontTestUtils.cpp
+++ /dev/null

@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <libxml/tree.h>
-
-#include <minikin/FontCollection.h>
-#include <minikin/FontFamily.h>
-
-#include <cutils/log.h>
-
-#include "FontLanguage.h"
-#include "MinikinFontForTest.h"
-
-android::FontCollection* getFontCollection(const char* fontDir, const char* fontXml) {
-    xmlDoc* doc = xmlReadFile(fontXml, NULL, 0);
-    xmlNode* familySet = xmlDocGetRootElement(doc);
-
-    std::vector<android::FontFamily*> families;
-    for (xmlNode* familyNode = familySet->children; familyNode; familyNode = familyNode->next) {
-        if (xmlStrcmp(familyNode->name, (const xmlChar*)"family") != 0) {
-            continue;
-        }
-
-        xmlChar* variantXmlch = xmlGetProp(familyNode, (const xmlChar*)"variant");
-        int variant = android::VARIANT_DEFAULT;
-        if (variantXmlch) {
-            if (xmlStrcmp(variantXmlch, (const xmlChar*)"elegant") == 0) {
-                variant = android::VARIANT_ELEGANT;
-            } else if (xmlStrcmp(variantXmlch, (const xmlChar*)"compact") == 0) {
-                variant = android::VARIANT_COMPACT;
-            }
-        }
-
-        xmlChar* lang = xmlGetProp(familyNode, (const xmlChar*)"lang");
-        uint32_t langId = android::FontStyle::registerLanguageList(
-                std::string((const char*)lang, xmlStrlen(lang)));
-
-        android::FontFamily* family = new android::FontFamily(langId, variant);
-
-        for (xmlNode* fontNode = familyNode->children; fontNode; fontNode = fontNode->next) {
-            if (xmlStrcmp(fontNode->name, (const xmlChar*)"font") != 0) {
-                continue;
-            }
-
-            int weight = atoi((const char*)(xmlGetProp(fontNode, (const xmlChar*)"weight"))) / 100;
-            bool italic = xmlStrcmp(
-                    xmlGetProp(fontNode, (const xmlChar*)"style"), (const xmlChar*)"italic") == 0;
-
-            xmlChar* fontFileName = xmlNodeListGetString(doc, fontNode->xmlChildrenNode, 1);
-            std::string fontPath = fontDir + std::string((const char*)fontFileName);
-            xmlFree(fontFileName);
-
-            LOG_ALWAYS_FATAL_IF(access(fontPath.c_str(), R_OK) != 0,
-                    "%s is not found", fontPath.c_str());
-
-            family->addFont(new MinikinFontForTest(fontPath), android::FontStyle(weight, italic));
-        }
-        families.push_back(family);
-    }
-    xmlFreeDoc(doc);
-
-    android::FontCollection* collection = new android::FontCollection(families);
-    collection->Ref();
-    for (size_t i = 0; i < families.size(); ++i) {
-        families[i]->Unref();
-    }
-    return collection;
-}

diff --git a/tests/GraphemeBreakTests.cpp b/tests/GraphemeBreakTests.cpp
deleted file mode 100644
index 9dfd426..0000000
--- a/tests/GraphemeBreakTests.cpp
+++ /dev/null

@@ -1,201 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <UnicodeUtils.h>
-#include <minikin/GraphemeBreak.h>
-
-using namespace android;
-
-bool IsBreak(const char* src) {
-    const size_t BUF_SIZE = 256;
-    uint16_t buf[BUF_SIZE];
-    size_t offset;
-    size_t size;
-    ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
-    return GraphemeBreak::isGraphemeBreak(buf, 0, size, offset);
-}
-
-TEST(GraphemeBreak, utf16) {
-    EXPECT_FALSE(IsBreak("U+D83C | U+DC31"));  // emoji, U+1F431
-
-    // tests for invalid UTF-16
-    EXPECT_TRUE(IsBreak("U+D800 | U+D800"));  // two leading surrogates
-    EXPECT_TRUE(IsBreak("U+DC00 | U+DC00"));  // two trailing surrogates
-    EXPECT_TRUE(IsBreak("'a' | U+D800"));  // lonely leading surrogate
-    EXPECT_TRUE(IsBreak("U+DC00 | 'a'"));  // lonely trailing surrogate
-    EXPECT_TRUE(IsBreak("U+D800 | 'a'"));  // leading surrogate followed by non-surrogate
-    EXPECT_TRUE(IsBreak("'a' | U+DC00"));  // non-surrogate followed by trailing surrogate
-}
-
-TEST(GraphemeBreak, rules) {
-    // Rule GB1, sot ÷; Rule GB2, ÷ eot
-    EXPECT_TRUE(IsBreak("| 'a'"));
-    EXPECT_TRUE(IsBreak("'a' |"));
-
-    // Rule GB3, CR x LF
-    EXPECT_FALSE(IsBreak("U+000D | U+000A"));  // CR x LF
-
-    // Rule GB4, (Control | CR | LF) ÷
-    EXPECT_TRUE(IsBreak("'a' | U+2028"));  // Line separator
-    EXPECT_TRUE(IsBreak("'a' | U+000D"));  // LF
-    EXPECT_TRUE(IsBreak("'a' | U+000A"));  // CR
-
-    // Rule GB5, ÷ (Control | CR | LF)
-    EXPECT_TRUE(IsBreak("U+2028 | 'a'"));  // Line separator
-    EXPECT_TRUE(IsBreak("U+000D | 'a'"));  // LF
-    EXPECT_TRUE(IsBreak("U+000A | 'a'"));  // CR
-
-    // Rule GB6, L x ( L | V | LV | LVT )
-    EXPECT_FALSE(IsBreak("U+1100 | U+1100"));  // L x L
-    EXPECT_FALSE(IsBreak("U+1100 | U+1161"));  // L x V
-    EXPECT_FALSE(IsBreak("U+1100 | U+AC00"));  // L x LV
-    EXPECT_FALSE(IsBreak("U+1100 | U+AC01"));  // L x LVT
-
-    // Rule GB7, ( LV | V ) x ( V | T )
-    EXPECT_FALSE(IsBreak("U+AC00 | U+1161"));  // LV x V
-    EXPECT_FALSE(IsBreak("U+1161 | U+1161"));  // V x V
-    EXPECT_FALSE(IsBreak("U+AC00 | U+11A8"));  // LV x T
-    EXPECT_FALSE(IsBreak("U+1161 | U+11A8"));  // V x T
-
-    // Rule GB8, ( LVT | T ) x T
-    EXPECT_FALSE(IsBreak("U+AC01 | U+11A8"));  // LVT x T
-    EXPECT_FALSE(IsBreak("U+11A8 | U+11A8"));  // T x T
-
-    // Other hangul pairs not counted above _are_ breaks (GB10)
-    EXPECT_TRUE(IsBreak("U+AC00 | U+1100"));  // LV x L
-    EXPECT_TRUE(IsBreak("U+AC01 | U+1100"));  // LVT x L
-    EXPECT_TRUE(IsBreak("U+11A8 | U+1100"));  // T x L
-    EXPECT_TRUE(IsBreak("U+11A8 | U+AC00"));  // T x LV
-    EXPECT_TRUE(IsBreak("U+11A8 | U+AC01"));  // T x LVT
-
-    // Rule GB8a, Regional_Indicator x Regional_Indicator
-    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
-    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
-    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
-    EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag)
-
-    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA"));  // Regional indicator pair (flag)
-    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
-
-    EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA"));  // Regional indicator pair (flag)
-    EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
-
-    EXPECT_TRUE(
-            IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
-    EXPECT_FALSE(
-            IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
-    EXPECT_FALSE(
-            IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
-
-    // Rule GB9, x Extend
-    EXPECT_FALSE(IsBreak("'a' | U+0301"));  // combining accent
-    // Rule GB9a, x SpacingMark
-    EXPECT_FALSE(IsBreak("U+0915 | U+093E"));  // KA, AA (spacing mark)
-    // Rule GB9b, Prepend x
-    // see tailoring test for prepend, as current ICU doesn't have any characters in the class
-
-    // Rule GB10, Any ÷ Any
-    EXPECT_TRUE(IsBreak("'a' | 'b'"));
-    EXPECT_TRUE(IsBreak("'f' | 'i'"));  // probable ligature
-    EXPECT_TRUE(IsBreak("U+0644 | U+0627"));  // probable ligature, lam + alef
-    EXPECT_TRUE(IsBreak("U+4E00 | U+4E00"));  // CJK ideographs
-    EXPECT_TRUE(IsBreak("'a' | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
-    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | 'a'"));  // Regional indicator pair (flag)
-}
-
-TEST(GraphemeBreak, tailoring) {
-    // control characters that we interpret as "extend"
-    EXPECT_FALSE(IsBreak("'a' | U+00AD"));  // soft hyphen
-    EXPECT_FALSE(IsBreak("'a' | U+200B"));  // zwsp
-    EXPECT_FALSE(IsBreak("'a' | U+200E"));  // lrm
-    EXPECT_FALSE(IsBreak("'a' | U+202A"));  // lre
-    EXPECT_FALSE(IsBreak("'a' | U+E0041"));  // tag character
-
-    // UTC-approved characters for the Prepend class
-    EXPECT_FALSE(IsBreak("U+06DD | U+0661"));  // arabic subtending mark + digit one
-
-    EXPECT_TRUE(IsBreak("U+0E01 | U+0E33"));  // Thai sara am
-
-    // virama is not a grapheme break, but "pure killer" is
-    EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
-    EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
-    EXPECT_FALSE(IsBreak("U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
-    EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
-
-    // suppress grapheme breaks in zwj emoji sequences, see
-    // http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468"));
-    EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466"));
-    EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466"));
-    EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8"));
-
-    // Do not break before and after zwj with all kind of emoji characters.
-    EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464"));
-    EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464"));
-
-    // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break
-    EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764"));
-}
-
-TEST(GraphemeBreak, emojiModifiers) {
-    EXPECT_FALSE(IsBreak("U+261D | U+1F3FB"));  // white up pointing index + modifier
-    EXPECT_FALSE(IsBreak("U+270C | U+1F3FB"));  // victory hand + modifier
-    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB"));  // boy + modifier
-    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC"));  // boy + modifier
-    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD"));  // boy + modifier
-    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE"));  // boy + modifier
-    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF"));  // boy + modifier
-    EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF"));  // sign of the horns + modifier
-    EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF"));  // selfie (Unicode 9) + modifier
-
-    // adding emoji style variation selector doesn't affect grapheme cluster
-    EXPECT_TRUE(IsBreak("U+270C U+FE0E | U+1F3FB"));  // victory hand + text style + modifier
-    EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
-
-    // heart is not an emoji base
-    EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB"));  // heart + modifier
-    EXPECT_TRUE(IsBreak("U+2764 U+FE0E | U+1F3FB"));  // heart + emoji style + modifier
-    EXPECT_TRUE(IsBreak("U+2764 U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
-    EXPECT_TRUE(IsBreak("U+1F3FB | U+1F3FB"));  // modifier + modifier
-
-    // rat is not an emoji modifer
-    EXPECT_TRUE(IsBreak("U+1F466 | U+1F400"));  // boy + rat
-
-}
-
-TEST(GraphemeBreak, genderBalancedEmoji) {
-    // U+1F469 is WOMAN, U+200D is ZWJ, U+1F4BC is BRIEFCASE.
-    EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+1F4BC"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F4BC"));
-
-    // U+2695 has now emoji property, so should be part of ZWJ sequence.
-    EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+2695"));
-    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2695"));
-}
-
-TEST(GraphemeBreak, offsets) {
-    uint16_t string[] = { 0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301 };
-    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 2));
-    EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 3));
-    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 4));
-    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 5));
-}

diff --git a/tests/HbFontCacheTest.cpp b/tests/HbFontCacheTest.cpp
deleted file mode 100644
index 2dee61a..0000000
--- a/tests/HbFontCacheTest.cpp
+++ /dev/null

@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "HbFontCache.h"
-
-#include <cutils/log.h>
-#include <hb.h>
-#include <utils/Mutex.h>
-
-#include "MinikinInternal.h"
-#include "MinikinFontForTest.h"
-#include <minikin/MinikinFont.h>
-
-namespace android {
-namespace {
-
-class HbFontCacheTest : public testing::Test {
-public:
-    virtual void TearDown() {
-        AutoMutex _l(gMinikinLock);
-        purgeHbFontCacheLocked();
-    }
-};
-
-TEST_F(HbFontCacheTest, getHbFontLockedTest) {
-    AutoMutex _l(gMinikinLock);
-
-    MinikinFontForTest fontA(kTestFontDir "Regular.ttf");
-    MinikinFontForTest fontB(kTestFontDir "Bold.ttf");
-    MinikinFontForTest fontC(kTestFontDir "BoldItalic.ttf");
-
-    // Never return NULL.
-    EXPECT_NE(nullptr, getHbFontLocked(&fontA));
-    EXPECT_NE(nullptr, getHbFontLocked(&fontB));
-    EXPECT_NE(nullptr, getHbFontLocked(&fontC));
-
-    EXPECT_NE(nullptr, getHbFontLocked(nullptr));
-
-    // Must return same object if same font object is passed.
-    EXPECT_EQ(getHbFontLocked(&fontA), getHbFontLocked(&fontA));
-    EXPECT_EQ(getHbFontLocked(&fontB), getHbFontLocked(&fontB));
-    EXPECT_EQ(getHbFontLocked(&fontC), getHbFontLocked(&fontC));
-
-    // Different object must be returned if the passed minikinFont has different ID.
-    EXPECT_NE(getHbFontLocked(&fontA), getHbFontLocked(&fontB));
-    EXPECT_NE(getHbFontLocked(&fontA), getHbFontLocked(&fontC));
-}
-
-TEST_F(HbFontCacheTest, purgeCacheTest) {
-    AutoMutex _l(gMinikinLock);
-    MinikinFontForTest minikinFont(kTestFontDir "Regular.ttf");
-
-    hb_font_t* font = getHbFontLocked(&minikinFont);
-    ASSERT_NE(nullptr, font);
-
-    // Set user data to identify the font object.
-    hb_user_data_key_t key;
-    void* data = (void*)0xdeadbeef;
-    hb_font_set_user_data(font, &key, data, NULL, false);
-    ASSERT_EQ(data, hb_font_get_user_data(font, &key));
-
-    purgeHbFontCacheLocked();
-
-    // By checking user data, confirm that the object after purge is different from previously
-    // created one. Do not compare the returned pointer here since memory allocator may assign
-    // same region for new object.
-    font = getHbFontLocked(&minikinFont);
-    EXPECT_EQ(nullptr, hb_font_get_user_data(font, &key));
-}
-
-}  // namespace
-}  // namespace android

diff --git a/tests/MinikinFontForTest.cpp b/tests/MinikinFontForTest.cpp
deleted file mode 100644
index 66dd4ea..0000000
--- a/tests/MinikinFontForTest.cpp
+++ /dev/null

@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinikinFontForTest.h"
-
-#include <minikin/MinikinFont.h>
-
-#include <SkTypeface.h>
-
-#include <cutils/log.h>
-
-MinikinFontForTest::MinikinFontForTest(const std::string& font_path) :
-    MinikinFontForTest(font_path, SkTypeface::CreateFromFile(font_path.c_str())) {
-}
-
-MinikinFontForTest::MinikinFontForTest(const std::string& font_path, SkTypeface* typeface) :
-    MinikinFont(typeface->uniqueID()),
-    mTypeface(typeface),
-    mFontPath(font_path) {
-}
-
-MinikinFontForTest::~MinikinFontForTest() {
-}
-
-float MinikinFontForTest::GetHorizontalAdvance(uint32_t /* glyph_id */,
-        const android::MinikinPaint& /* paint */) const {
-    LOG_ALWAYS_FATAL("MinikinFontForTest::GetHorizontalAdvance is not yet implemented");
-    return 0.0f;
-}
-
-void MinikinFontForTest::GetBounds(android::MinikinRect* /* bounds */, uint32_t /* glyph_id */,
-        const android::MinikinPaint& /* paint */) const {
-    LOG_ALWAYS_FATAL("MinikinFontForTest::GetBounds is not yet implemented");
-}
-
-const void* MinikinFontForTest::GetTable(uint32_t tag, size_t* size,
-        android::MinikinDestroyFunc* destroy) {
-    const size_t tableSize = mTypeface->getTableSize(tag);
-    *size = tableSize;
-    if (tableSize == 0) {
-        return nullptr;
-    }
-    void* buf = malloc(tableSize);
-    if (buf == nullptr) {
-        return nullptr;
-    }
-    mTypeface->getTableData(tag, 0, tableSize, buf);
-    *destroy = free;
-    return buf;
-}

diff --git a/tests/MinikinFontForTest.h b/tests/MinikinFontForTest.h
deleted file mode 100644
index e527d21..0000000
--- a/tests/MinikinFontForTest.h
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
-#define MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
-
-#include <minikin/MinikinFont.h>
-
-class SkTypeface;
-
-class MinikinFontForTest : public android::MinikinFont {
-public:
-    explicit MinikinFontForTest(const std::string& font_path);
-    MinikinFontForTest(const std::string& font_path, SkTypeface* typeface);
-    ~MinikinFontForTest();
-
-    // MinikinFont overrides.
-    float GetHorizontalAdvance(uint32_t glyph_id, const android::MinikinPaint &paint) const;
-    void GetBounds(android::MinikinRect* bounds, uint32_t glyph_id,
-            const android::MinikinPaint& paint) const;
-    const void* GetTable(uint32_t tag, size_t* size, android::MinikinDestroyFunc* destroy);
-
-    const std::string& fontPath() const { return mFontPath; }
-private:
-    SkTypeface *mTypeface;
-    const std::string mFontPath;
-};
-
-#endif  // MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H

diff --git a/tests/MinikinInternalTest.cpp b/tests/MinikinInternalTest.cpp
deleted file mode 100644
index 9c1a1e5..0000000
--- a/tests/MinikinInternalTest.cpp
+++ /dev/null

@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MinikinInternal.h"
-
-namespace android {
-
-TEST(MinikinInternalTest, isEmojiTest) {
-    EXPECT_TRUE(isEmoji(0x0023));  // NUMBER SIGN
-    EXPECT_TRUE(isEmoji(0x0035));  // DIGIT FIVE
-    EXPECT_TRUE(isEmoji(0x1F0CF));  // PLAYING CARD BLACK JOKER
-    EXPECT_TRUE(isEmoji(0x1F1E9));  // REGIONAL INDICATOR SYMBOL LETTER D
-
-    EXPECT_FALSE(isEmoji(0x0000));  // <control>
-    EXPECT_FALSE(isEmoji(0x0061));  // LATIN SMALL LETTER A
-    EXPECT_FALSE(isEmoji(0x29E3D));  // A han character.
-}
-
-}  // namespace android

diff --git a/tests/UnicodeUtils.cpp b/tests/UnicodeUtils.cpp
deleted file mode 100644
index 501fc9f..0000000
--- a/tests/UnicodeUtils.cpp
+++ /dev/null

@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <unicode/utf.h>
-#include <cstdlib>
-
-// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
-// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
-void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
-        size_t* offset) {
-    size_t input_ix = 0;
-    size_t output_ix = 0;
-    bool seen_offset = false;
-
-    while (src[input_ix] != 0) {
-        switch (src[input_ix]) {
-        case '\'':
-            // single ASCII char
-            ASSERT_LT(src[input_ix], 0x80);
-            input_ix++;
-            ASSERT_NE(src[input_ix], 0);
-            ASSERT_LT(output_ix, buf_size);
-            buf[output_ix++] = (uint16_t)src[input_ix++];
-            ASSERT_EQ(src[input_ix], '\'');
-            input_ix++;
-            break;
-        case 'u':
-        case 'U': {
-            // Unicode codepoint in hex syntax
-            input_ix++;
-            ASSERT_EQ(src[input_ix], '+');
-            input_ix++;
-            char* endptr = (char*)src + input_ix;
-            unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
-            size_t num_hex_digits = endptr - (src + input_ix);
-            ASSERT_GE(num_hex_digits, 4u);  // also triggers on invalid number syntax, digits = 0
-            ASSERT_LE(num_hex_digits, 6u);
-            ASSERT_LE(codepoint, 0x10FFFFu);
-            input_ix += num_hex_digits;
-            if (U16_LENGTH(codepoint) == 1) {
-                ASSERT_LE(output_ix + 1, buf_size);
-                buf[output_ix++] = codepoint;
-            } else {
-                // UTF-16 encoding
-                ASSERT_LE(output_ix + 2, buf_size);
-                buf[output_ix++] = U16_LEAD(codepoint);
-                buf[output_ix++] = U16_TRAIL(codepoint);
-            }
-            break;
-        }
-        case ' ':
-            input_ix++;
-            break;
-        case '|':
-            ASSERT_FALSE(seen_offset);
-            ASSERT_NE(offset, nullptr);
-            *offset = output_ix;
-            seen_offset = true;
-            input_ix++;
-            break;
-        default:
-            FAIL();  // unexpected character
-        }
-    }
-    ASSERT_NE(result_size, nullptr);
-    *result_size = output_ix;
-    ASSERT_TRUE(seen_offset || offset == nullptr);
-}
-
-TEST(UnicodeUtils, parse) {
-    const size_t BUF_SIZE = 256;
-    uint16_t buf[BUF_SIZE];
-    size_t offset;
-    size_t size;
-    ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset);
-    EXPECT_EQ(size, 4u);
-    EXPECT_EQ(offset, 3u);
-    EXPECT_EQ(buf[0], 0x000D);
-    EXPECT_EQ(buf[1], 0xD83D);
-    EXPECT_EQ(buf[2], 0xDC31);
-    EXPECT_EQ(buf[3], 'a');
-}

diff --git a/tests/data/MultiAxis.ttf b/tests/data/MultiAxis.ttf
new file mode 100644
index 0000000..1d687cb
--- /dev/null
+++ b/tests/data/MultiAxis.ttf
Binary files differ

diff --git a/tests/data/MultiAxis.ttx b/tests/data/MultiAxis.ttx
new file mode 100644
index 0000000..7c17198
--- /dev/null
+++ b/tests/data/MultiAxis.ttx

@@ -0,0 +1,223 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2017 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="3.0">
+
+  <GlyphOrder>
+    <!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
+    <GlyphID id="0" name=".notdef"/>
+    <GlyphID id="1" name="default"/>
+  </GlyphOrder>
+
+  <head>
+    <!-- Most of this table will be recalculated by the compiler -->
+    <tableVersion value="1.0"/>
+    <fontRevision value="1.0"/>
+    <checkSumAdjustment value="0x640cdb2f"/>
+    <magicNumber value="0x5f0f3cf5"/>
+    <flags value="00000000 00000011"/>
+    <unitsPerEm value="1000"/>
+    <created value="Wed Sep  9 08:01:17 2015"/>
+    <modified value="Wed Sep  9 08:48:07 2015"/>
+    <xMin value="30"/>
+    <yMin value="-200"/>
+    <xMax value="629"/>
+    <yMax value="800"/>
+    <macStyle value="00000000 00000000"/>
+    <lowestRecPPEM value="7"/>
+    <fontDirectionHint value="2"/>
+    <indexToLocFormat value="0"/>
+    <glyphDataFormat value="0"/>
+  </head>
+
+  <hhea>
+    <tableVersion value="1.0"/>
+    <ascent value="1000"/>
+    <descent value="-200"/>
+    <lineGap value="0"/>
+    <advanceWidthMax value="659"/>
+    <minLeftSideBearing value="0"/>
+    <minRightSideBearing value="30"/>
+    <xMaxExtent value="629"/>
+    <caretSlopeRise value="1"/>
+    <caretSlopeRun value="0"/>
+    <caretOffset value="0"/>
+    <reserved0 value="0"/>
+    <reserved1 value="0"/>
+    <reserved2 value="0"/>
+    <reserved3 value="0"/>
+    <metricDataFormat value="0"/>
+    <numberOfHMetrics value="18"/>
+  </hhea>
+
+  <maxp>
+    <!-- Most of this table will be recalculated by the compiler -->
+    <tableVersion value="0x10000"/>
+    <numGlyphs value="54"/>
+    <maxPoints value="73"/>
+    <maxContours value="10"/>
+    <maxCompositePoints value="0"/>
+    <maxCompositeContours value="0"/>
+    <maxZones value="2"/>
+    <maxTwilightPoints value="12"/>
+    <maxStorage value="28"/>
+    <maxFunctionDefs value="119"/>
+    <maxInstructionDefs value="0"/>
+    <maxStackElements value="61"/>
+    <maxSizeOfInstructions value="2967"/>
+    <maxComponentElements value="0"/>
+    <maxComponentDepth value="0"/>
+  </maxp>
+
+  <OS_2>
+    <!-- The fields 'usFirstCharIndex' and 'usLastCharIndex'
+         will be recalculated by the compiler -->
+    <version value="3"/>
+    <xAvgCharWidth value="594"/>
+    <usWeightClass value="400"/>
+    <usWidthClass value="5"/>
+    <fsType value="00000000 00001000"/>
+    <ySubscriptXSize value="650"/>
+    <ySubscriptYSize value="600"/>
+    <ySubscriptXOffset value="0"/>
+    <ySubscriptYOffset value="75"/>
+    <ySuperscriptXSize value="650"/>
+    <ySuperscriptYSize value="600"/>
+    <ySuperscriptXOffset value="0"/>
+    <ySuperscriptYOffset value="350"/>
+    <yStrikeoutSize value="50"/>
+    <yStrikeoutPosition value="300"/>
+    <sFamilyClass value="0"/>
+    <panose>
+      <bFamilyType value="0"/>
+      <bSerifStyle value="0"/>
+      <bWeight value="5"/>
+      <bProportion value="0"/>
+      <bContrast value="0"/>
+      <bStrokeVariation value="0"/>
+      <bArmStyle value="0"/>
+      <bLetterForm value="0"/>
+      <bMidline value="0"/>
+      <bXHeight value="0"/>
+    </panose>
+    <ulUnicodeRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulUnicodeRange2 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange3 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange4 value="00000000 00000000 00000000 00000000"/>
+    <achVendID value="UKWN"/>
+    <fsSelection value="00000000 01000000"/>
+    <usFirstCharIndex value="32"/>
+    <usLastCharIndex value="122"/>
+    <sTypoAscender value="800"/>
+    <sTypoDescender value="-200"/>
+    <sTypoLineGap value="200"/>
+    <usWinAscent value="1000"/>
+    <usWinDescent value="200"/>
+    <ulCodePageRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulCodePageRange2 value="00000000 00000000 00000000 00000000"/>
+    <sxHeight value="500"/>
+    <sCapHeight value="700"/>
+    <usDefaultChar value="0"/>
+    <usBreakChar value="32"/>
+    <usMaxContext value="0"/>
+  </OS_2>
+
+  <hmtx>
+    <mtx name=".notdef" width="500" lsb="93"/>
+    <mtx name="default" width="500" lsb="93"/>
+  </hmtx>
+
+  <cmap>
+    <tableVersion version="0"/>
+    <cmap_format_4 platformID="3" platEncID="10" language="0">
+      <map code="0x0061" name="default" />
+    </cmap_format_4>
+  </cmap>
+
+  <loca>
+    <!-- The 'loca' table will be calculated by the compiler -->
+  </loca>
+
+  <glyf>
+
+    <!-- The xMin, yMin, xMax and yMax values
+         will be recalculated by the compiler. -->
+
+    <TTGlyph name=".notdef" xMin="0" yMin="0" xMax="0" yMax="0">
+      <contour></contour><instructions><assembly></assembly></instructions>
+    </TTGlyph>
+
+    <TTGlyph name="default" xMin="0" yMin="0" xMax="0" yMax="0">
+      <contour></contour><instructions><assembly></assembly></instructions>
+    </TTGlyph>
+  </glyf>
+
+  <fvar>
+    <Axis>
+      <AxisTag>wdth</AxisTag>
+      <MinValue>-1.0</MinValue>
+      <DefaultValue>0.0</DefaultValue>
+      <MaxValue>1.0</MaxValue>
+      <NameID>256</NameID>
+    </Axis>
+    <Axis>
+      <AxisTag>wght</AxisTag>
+      <MinValue>-1.0</MinValue>
+      <DefaultValue>0.0</DefaultValue>
+      <MaxValue>1.0</MaxValue>
+      <NameID>256</NameID>
+    </Axis>
+  </fvar>
+
+  <name>
+    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      MultiAxisFont Test
+    </namerecord>
+    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      MultiAxis
+    </namerecord>
+    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      MultiAxisFont Test
+    </namerecord>
+    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      MultiAxisFontTest-Regular
+    </namerecord>
+    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
+      MultiAxisFont Test
+    </namerecord>
+    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
+      MultiAxis
+    </namerecord>
+    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
+      MultiAxisFont Test
+    </namerecord>
+    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
+      MultiAxisFontTest-Regular
+    </namerecord>
+  </name>
+
+  <post>
+    <formatType value="3.0"/>
+    <italicAngle value="0.0"/>
+    <underlinePosition value="-75"/>
+    <underlineThickness value="50"/>
+    <isFixedPitch value="0"/>
+    <minMemType42 value="0"/>
+    <maxMemType42 value="0"/>
+    <minMemType1 value="0"/>
+    <maxMemType1 value="0"/>
+  </post>
+
+</ttFont>

diff --git a/tests/data/NoCmapFormat14.ttf b/tests/data/NoCmapFormat14.ttf
new file mode 100644
index 0000000..2a0c46c
--- /dev/null
+++ b/tests/data/NoCmapFormat14.ttf
Binary files differ

diff --git a/tests/data/NoCmapFormat14.ttx b/tests/data/NoCmapFormat14.ttx
new file mode 100644
index 0000000..3c7411b
--- /dev/null
+++ b/tests/data/NoCmapFormat14.ttx

@@ -0,0 +1,207 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2016 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="3.0">
+
+  <GlyphOrder>
+    <!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
+    <GlyphID id="0" name=".notdef"/>
+    <GlyphID id="1" name="defaultGlyph"/>
+  </GlyphOrder>
+
+  <head>
+    <!-- Most of this table will be recalculated by the compiler -->
+    <tableVersion value="1.0"/>
+    <fontRevision value="1.0"/>
+    <checkSumAdjustment value="0x640cdb2f"/>
+    <magicNumber value="0x5f0f3cf5"/>
+    <flags value="00000000 00000011"/>
+    <unitsPerEm value="1000"/>
+    <created value="Wed Sep  9 08:01:17 2015"/>
+    <modified value="Wed Sep  9 08:48:07 2015"/>
+    <xMin value="30"/>
+    <yMin value="-200"/>
+    <xMax value="629"/>
+    <yMax value="800"/>
+    <macStyle value="00000000 00000000"/>
+    <lowestRecPPEM value="7"/>
+    <fontDirectionHint value="2"/>
+    <indexToLocFormat value="0"/>
+    <glyphDataFormat value="0"/>
+  </head>
+
+  <hhea>
+    <tableVersion value="1.0"/>
+    <ascent value="1000"/>
+    <descent value="-200"/>
+    <lineGap value="0"/>
+    <advanceWidthMax value="659"/>
+    <minLeftSideBearing value="0"/>
+    <minRightSideBearing value="30"/>
+    <xMaxExtent value="629"/>
+    <caretSlopeRise value="1"/>
+    <caretSlopeRun value="0"/>
+    <caretOffset value="0"/>
+    <reserved0 value="0"/>
+    <reserved1 value="0"/>
+    <reserved2 value="0"/>
+    <reserved3 value="0"/>
+    <metricDataFormat value="0"/>
+    <numberOfHMetrics value="18"/>
+  </hhea>
+
+  <maxp>
+    <!-- Most of this table will be recalculated by the compiler -->
+    <tableVersion value="0x10000"/>
+    <numGlyphs value="54"/>
+    <maxPoints value="73"/>
+    <maxContours value="10"/>
+    <maxCompositePoints value="0"/>
+    <maxCompositeContours value="0"/>
+    <maxZones value="2"/>
+    <maxTwilightPoints value="12"/>
+    <maxStorage value="28"/>
+    <maxFunctionDefs value="119"/>
+    <maxInstructionDefs value="0"/>
+    <maxStackElements value="61"/>
+    <maxSizeOfInstructions value="2967"/>
+    <maxComponentElements value="0"/>
+    <maxComponentDepth value="0"/>
+  </maxp>
+
+  <OS_2>
+    <!-- The fields 'usFirstCharIndex' and 'usLastCharIndex'
+         will be recalculated by the compiler -->
+    <version value="3"/>
+    <xAvgCharWidth value="594"/>
+    <usWeightClass value="400"/>
+    <usWidthClass value="5"/>
+    <fsType value="00000000 00001000"/>
+    <ySubscriptXSize value="650"/>
+    <ySubscriptYSize value="600"/>
+    <ySubscriptXOffset value="0"/>
+    <ySubscriptYOffset value="75"/>
+    <ySuperscriptXSize value="650"/>
+    <ySuperscriptYSize value="600"/>
+    <ySuperscriptXOffset value="0"/>
+    <ySuperscriptYOffset value="350"/>
+    <yStrikeoutSize value="50"/>
+    <yStrikeoutPosition value="300"/>
+    <sFamilyClass value="0"/>
+    <panose>
+      <bFamilyType value="0"/>
+      <bSerifStyle value="0"/>
+      <bWeight value="5"/>
+      <bProportion value="0"/>
+      <bContrast value="0"/>
+      <bStrokeVariation value="0"/>
+      <bArmStyle value="0"/>
+      <bLetterForm value="0"/>
+      <bMidline value="0"/>
+      <bXHeight value="0"/>
+    </panose>
+    <ulUnicodeRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulUnicodeRange2 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange3 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange4 value="00000000 00000000 00000000 00000000"/>
+    <achVendID value="UKWN"/>
+    <fsSelection value="00000000 01000000"/>
+    <usFirstCharIndex value="32"/>
+    <usLastCharIndex value="122"/>
+    <sTypoAscender value="800"/>
+    <sTypoDescender value="-200"/>
+    <sTypoLineGap value="200"/>
+    <usWinAscent value="1000"/>
+    <usWinDescent value="200"/>
+    <ulCodePageRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulCodePageRange2 value="00000000 00000000 00000000 00000000"/>
+    <sxHeight value="500"/>
+    <sCapHeight value="700"/>
+    <usDefaultChar value="0"/>
+    <usBreakChar value="32"/>
+    <usMaxContext value="0"/>
+  </OS_2>
+
+  <hmtx>
+    <mtx name=".notdef" width="500" lsb="93"/>
+    <mtx name="defaultGlyph" width="500" lsb="93"/>
+  </hmtx>
+
+  <cmap>
+    <tableVersion version="0"/>
+    <cmap_format_4 platformID="3" platEncID="10" language="0">
+      <map code="0x5380" name="defaultGlyph" />
+    </cmap_format_4>
+    <!-- Do not add cmap_format_14 here since this font is desinged for testing purpose. -->
+  </cmap>
+
+  <loca>
+    <!-- The 'loca' table will be calculated by the compiler -->
+  </loca>
+
+  <glyf>
+
+    <!-- The xMin, yMin, xMax and yMax values
+         will be recalculated by the compiler. -->
+
+    <TTGlyph name=".notdef" xMin="0" yMin="0" xMax="0" yMax="0">
+      <contour></contour><instructions><assembly></assembly></instructions>
+    </TTGlyph>
+
+    <TTGlyph name="defaultGlyph" xMin="0" yMin="0" xMax="0" yMax="0">
+      <contour></contour><instructions><assembly></assembly></instructions>
+    </TTGlyph>
+  </glyf>
+
+  <name>
+    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      No Cmap Format 14 Subtable Test
+    </namerecord>
+    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      No Cmap Format 14 Subtable Test
+    </namerecord>
+    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      No Cmap Format 14 SubtableTest-Regular
+    </namerecord>
+    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
+      No Cmap Format 14 Subtable Test
+    </namerecord>
+    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
+      No Cmap Format 14 Subtable Test
+    </namerecord>
+    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
+      No Cmap Format 14 SubtableTest-Regular
+    </namerecord>
+  </name>
+
+  <post>
+    <formatType value="3.0"/>
+    <italicAngle value="0.0"/>
+    <underlinePosition value="-75"/>
+    <underlineThickness value="50"/>
+    <isFixedPitch value="0"/>
+    <minMemType42 value="0"/>
+    <maxMemType42 value="0"/>
+    <minMemType1 value="0"/>
+    <maxMemType1 value="0"/>
+  </post>
+
+</ttFont>

diff --git a/tests/data/UnicodeBMPOnly.ttf b/tests/data/UnicodeBMPOnly.ttf
new file mode 100644
index 0000000..8196669
--- /dev/null
+++ b/tests/data/UnicodeBMPOnly.ttf
Binary files differ

diff --git a/tests/data/UnicodeBMPOnly.ttx b/tests/data/UnicodeBMPOnly.ttx
new file mode 100644
index 0000000..b50a3f0
--- /dev/null
+++ b/tests/data/UnicodeBMPOnly.ttx

@@ -0,0 +1,177 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2017 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="3.0">
+
+  <GlyphOrder>
+    <GlyphID id="0" name=".notdef"/>
+    <GlyphID id="1" name="a"/>
+  </GlyphOrder>
+
+  <head>
+    <tableVersion value="1.0"/>
+    <fontRevision value="1.0"/>
+    <checkSumAdjustment value="0x640cdb2f"/>
+    <magicNumber value="0x5f0f3cf5"/>
+    <flags value="00000000 00000011"/>
+    <unitsPerEm value="1000"/>
+    <created value="Fri Mar 17 07:31:00 2017"/>
+    <macStyle value="00000000 00000000"/>
+    <lowestRecPPEM value="7"/>
+    <fontDirectionHint value="2"/>
+    <glyphDataFormat value="0"/>
+  </head>
+
+  <hhea>
+    <tableVersion value="1.0"/>
+    <ascent value="1000"/>
+    <descent value="-200"/>
+    <lineGap value="0"/>
+    <caretSlopeRise value="1"/>
+    <caretSlopeRun value="0"/>
+    <caretOffset value="0"/>
+    <reserved0 value="0"/>
+    <reserved1 value="0"/>
+    <reserved2 value="0"/>
+    <reserved3 value="0"/>
+    <metricDataFormat value="0"/>
+  </hhea>
+
+  <maxp>
+    <tableVersion value="0x10000"/>
+    <maxZones value="0"/>
+    <maxTwilightPoints value="0"/>
+    <maxStorage value="0"/>
+    <maxFunctionDefs value="0"/>
+    <maxInstructionDefs value="0"/>
+    <maxStackElements value="0"/>
+    <maxSizeOfInstructions value="0"/>
+    <maxComponentElements value="0"/>
+  </maxp>
+
+  <OS_2>
+    <!-- The fields 'usFirstCharIndex' and 'usLastCharIndex'
+         will be recalculated by the compiler -->
+    <version value="3"/>
+    <xAvgCharWidth value="594"/>
+    <usWeightClass value="400"/>
+    <usWidthClass value="5"/>
+    <fsType value="00000000 00001000"/>
+    <ySubscriptXSize value="650"/>
+    <ySubscriptYSize value="600"/>
+    <ySubscriptXOffset value="0"/>
+    <ySubscriptYOffset value="75"/>
+    <ySuperscriptXSize value="650"/>
+    <ySuperscriptYSize value="600"/>
+    <ySuperscriptXOffset value="0"/>
+    <ySuperscriptYOffset value="350"/>
+    <yStrikeoutSize value="50"/>
+    <yStrikeoutPosition value="300"/>
+    <sFamilyClass value="0"/>
+    <panose>
+      <bFamilyType value="0"/>
+      <bSerifStyle value="0"/>
+      <bWeight value="5"/>
+      <bProportion value="0"/>
+      <bContrast value="0"/>
+      <bStrokeVariation value="0"/>
+      <bArmStyle value="0"/>
+      <bLetterForm value="0"/>
+      <bMidline value="0"/>
+      <bXHeight value="0"/>
+    </panose>
+    <ulUnicodeRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulUnicodeRange2 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange3 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange4 value="00000000 00000000 00000000 00000000"/>
+    <achVendID value="UKWN"/>
+    <fsSelection value="00000000 01000000"/>
+    <usFirstCharIndex value="32"/>
+    <usLastCharIndex value="122"/>
+    <sTypoAscender value="800"/>
+    <sTypoDescender value="-200"/>
+    <sTypoLineGap value="200"/>
+    <usWinAscent value="1000"/>
+    <usWinDescent value="200"/>
+    <ulCodePageRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulCodePageRange2 value="00000000 00000000 00000000 00000000"/>
+    <sxHeight value="500"/>
+    <sCapHeight value="700"/>
+    <usDefaultChar value="0"/>
+    <usBreakChar value="32"/>
+    <usMaxContext value="0"/>
+  </OS_2>
+
+  <hmtx>
+    <mtx name=".notdef" width="500" lsb="93"/>
+    <mtx name="a" width="500" lsb="93"/>
+  </hmtx>
+
+  <cmap>
+    <tableVersion version="0"/>
+    <cmap_format_4 platformID="0" platEncID="1" language="0">
+      <map code="0x0061" name="a" />
+    </cmap_format_4>
+  </cmap>
+
+  <loca>
+    <!-- The 'loca' table will be calculated by the compiler -->
+  </loca>
+
+  <glyf>
+    <TTGlyph name=".notdef" xMin="0" yMin="0" xMax="0" yMax="0" />
+    <TTGlyph name="a" xMin="0" yMin="0" xMax="0" yMax="0" />
+  </glyf>
+
+  <name>
+    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      SampleFont-Regular
+    </namerecord>
+    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
+      SampleFont-Regular
+    </namerecord>
+  </name>
+
+  <post>
+    <formatType value="3.0"/>
+    <italicAngle value="0.0"/>
+    <underlinePosition value="-75"/>
+    <underlineThickness value="50"/>
+    <isFixedPitch value="0"/>
+    <minMemType42 value="0"/>
+    <maxMemType42 value="0"/>
+    <minMemType1 value="0"/>
+    <maxMemType1 value="0"/>
+  </post>
+
+</ttFont>

diff --git a/tests/data/UnicodeBMPOnly2.ttf b/tests/data/UnicodeBMPOnly2.ttf
new file mode 100644
index 0000000..c14b195
--- /dev/null
+++ b/tests/data/UnicodeBMPOnly2.ttf
Binary files differ

diff --git a/tests/data/UnicodeBMPOnly2.ttx b/tests/data/UnicodeBMPOnly2.ttx
new file mode 100644
index 0000000..e43ebf8
--- /dev/null
+++ b/tests/data/UnicodeBMPOnly2.ttx

@@ -0,0 +1,177 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2017 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="3.0">
+
+  <GlyphOrder>
+    <GlyphID id="0" name=".notdef"/>
+    <GlyphID id="1" name="a"/>
+  </GlyphOrder>
+
+  <head>
+    <tableVersion value="1.0"/>
+    <fontRevision value="1.0"/>
+    <checkSumAdjustment value="0x640cdb2f"/>
+    <magicNumber value="0x5f0f3cf5"/>
+    <flags value="00000000 00000011"/>
+    <unitsPerEm value="1000"/>
+    <created value="Fri Mar 17 07:31:00 2017"/>
+    <macStyle value="00000000 00000000"/>
+    <lowestRecPPEM value="7"/>
+    <fontDirectionHint value="2"/>
+    <glyphDataFormat value="0"/>
+  </head>
+
+  <hhea>
+    <tableVersion value="1.0"/>
+    <ascent value="1000"/>
+    <descent value="-200"/>
+    <lineGap value="0"/>
+    <caretSlopeRise value="1"/>
+    <caretSlopeRun value="0"/>
+    <caretOffset value="0"/>
+    <reserved0 value="0"/>
+    <reserved1 value="0"/>
+    <reserved2 value="0"/>
+    <reserved3 value="0"/>
+    <metricDataFormat value="0"/>
+  </hhea>
+
+  <maxp>
+    <tableVersion value="0x10000"/>
+    <maxZones value="0"/>
+    <maxTwilightPoints value="0"/>
+    <maxStorage value="0"/>
+    <maxFunctionDefs value="0"/>
+    <maxInstructionDefs value="0"/>
+    <maxStackElements value="0"/>
+    <maxSizeOfInstructions value="0"/>
+    <maxComponentElements value="0"/>
+  </maxp>
+
+  <OS_2>
+    <!-- The fields 'usFirstCharIndex' and 'usLastCharIndex'
+         will be recalculated by the compiler -->
+    <version value="3"/>
+    <xAvgCharWidth value="594"/>
+    <usWeightClass value="400"/>
+    <usWidthClass value="5"/>
+    <fsType value="00000000 00001000"/>
+    <ySubscriptXSize value="650"/>
+    <ySubscriptYSize value="600"/>
+    <ySubscriptXOffset value="0"/>
+    <ySubscriptYOffset value="75"/>
+    <ySuperscriptXSize value="650"/>
+    <ySuperscriptYSize value="600"/>
+    <ySuperscriptXOffset value="0"/>
+    <ySuperscriptYOffset value="350"/>
+    <yStrikeoutSize value="50"/>
+    <yStrikeoutPosition value="300"/>
+    <sFamilyClass value="0"/>
+    <panose>
+      <bFamilyType value="0"/>
+      <bSerifStyle value="0"/>
+      <bWeight value="5"/>
+      <bProportion value="0"/>
+      <bContrast value="0"/>
+      <bStrokeVariation value="0"/>
+      <bArmStyle value="0"/>
+      <bLetterForm value="0"/>
+      <bMidline value="0"/>
+      <bXHeight value="0"/>
+    </panose>
+    <ulUnicodeRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulUnicodeRange2 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange3 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange4 value="00000000 00000000 00000000 00000000"/>
+    <achVendID value="UKWN"/>
+    <fsSelection value="00000000 01000000"/>
+    <usFirstCharIndex value="32"/>
+    <usLastCharIndex value="122"/>
+    <sTypoAscender value="800"/>
+    <sTypoDescender value="-200"/>
+    <sTypoLineGap value="200"/>
+    <usWinAscent value="1000"/>
+    <usWinDescent value="200"/>
+    <ulCodePageRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulCodePageRange2 value="00000000 00000000 00000000 00000000"/>
+    <sxHeight value="500"/>
+    <sCapHeight value="700"/>
+    <usDefaultChar value="0"/>
+    <usBreakChar value="32"/>
+    <usMaxContext value="0"/>
+  </OS_2>
+
+  <hmtx>
+    <mtx name=".notdef" width="500" lsb="93"/>
+    <mtx name="a" width="500" lsb="93"/>
+  </hmtx>
+
+  <cmap>
+    <tableVersion version="0"/>
+    <cmap_format_4 platformID="0" platEncID="2" language="0">
+      <map code="0x0061" name="a" />
+    </cmap_format_4>
+  </cmap>
+
+  <loca>
+    <!-- The 'loca' table will be calculated by the compiler -->
+  </loca>
+
+  <glyf>
+    <TTGlyph name=".notdef" xMin="0" yMin="0" xMax="0" yMax="0" />
+    <TTGlyph name="a" xMin="0" yMin="0" xMax="0" yMax="0" />
+  </glyf>
+
+  <name>
+    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      SampleFont-Regular
+    </namerecord>
+    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
+      SampleFont-Regular
+    </namerecord>
+  </name>
+
+  <post>
+    <formatType value="3.0"/>
+    <italicAngle value="0.0"/>
+    <underlinePosition value="-75"/>
+    <underlineThickness value="50"/>
+    <isFixedPitch value="0"/>
+    <minMemType42 value="0"/>
+    <maxMemType42 value="0"/>
+    <minMemType1 value="0"/>
+    <maxMemType1 value="0"/>
+  </post>
+
+</ttFont>

diff --git a/tests/data/UnicodeUCS4.ttf b/tests/data/UnicodeUCS4.ttf
new file mode 100644
index 0000000..354e1a3
--- /dev/null
+++ b/tests/data/UnicodeUCS4.ttf
Binary files differ

diff --git a/tests/data/UnicodeUCS4.ttx b/tests/data/UnicodeUCS4.ttx
new file mode 100644
index 0000000..da5575d
--- /dev/null
+++ b/tests/data/UnicodeUCS4.ttx

@@ -0,0 +1,181 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2017 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="3.0">
+
+  <GlyphOrder>
+    <GlyphID id="0" name=".notdef"/>
+    <GlyphID id="1" name="a"/>
+  </GlyphOrder>
+
+  <head>
+    <tableVersion value="1.0"/>
+    <fontRevision value="1.0"/>
+    <checkSumAdjustment value="0x640cdb2f"/>
+    <magicNumber value="0x5f0f3cf5"/>
+    <flags value="00000000 00000011"/>
+    <unitsPerEm value="1000"/>
+    <created value="Fri Mar 17 07:31:00 2017"/>
+    <macStyle value="00000000 00000000"/>
+    <lowestRecPPEM value="7"/>
+    <fontDirectionHint value="2"/>
+    <glyphDataFormat value="0"/>
+  </head>
+
+  <hhea>
+    <tableVersion value="1.0"/>
+    <ascent value="1000"/>
+    <descent value="-200"/>
+    <lineGap value="0"/>
+    <caretSlopeRise value="1"/>
+    <caretSlopeRun value="0"/>
+    <caretOffset value="0"/>
+    <reserved0 value="0"/>
+    <reserved1 value="0"/>
+    <reserved2 value="0"/>
+    <reserved3 value="0"/>
+    <metricDataFormat value="0"/>
+  </hhea>
+
+  <maxp>
+    <tableVersion value="0x10000"/>
+    <maxZones value="0"/>
+    <maxTwilightPoints value="0"/>
+    <maxStorage value="0"/>
+    <maxFunctionDefs value="0"/>
+    <maxInstructionDefs value="0"/>
+    <maxStackElements value="0"/>
+    <maxSizeOfInstructions value="0"/>
+    <maxComponentElements value="0"/>
+  </maxp>
+
+  <OS_2>
+    <!-- The fields 'usFirstCharIndex' and 'usLastCharIndex'
+         will be recalculated by the compiler -->
+    <version value="3"/>
+    <xAvgCharWidth value="594"/>
+    <usWeightClass value="400"/>
+    <usWidthClass value="5"/>
+    <fsType value="00000000 00001000"/>
+    <ySubscriptXSize value="650"/>
+    <ySubscriptYSize value="600"/>
+    <ySubscriptXOffset value="0"/>
+    <ySubscriptYOffset value="75"/>
+    <ySuperscriptXSize value="650"/>
+    <ySuperscriptYSize value="600"/>
+    <ySuperscriptXOffset value="0"/>
+    <ySuperscriptYOffset value="350"/>
+    <yStrikeoutSize value="50"/>
+    <yStrikeoutPosition value="300"/>
+    <sFamilyClass value="0"/>
+    <panose>
+      <bFamilyType value="0"/>
+      <bSerifStyle value="0"/>
+      <bWeight value="5"/>
+      <bProportion value="0"/>
+      <bContrast value="0"/>
+      <bStrokeVariation value="0"/>
+      <bArmStyle value="0"/>
+      <bLetterForm value="0"/>
+      <bMidline value="0"/>
+      <bXHeight value="0"/>
+    </panose>
+    <ulUnicodeRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulUnicodeRange2 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange3 value="00000000 00000000 00000000 00000000"/>
+    <ulUnicodeRange4 value="00000000 00000000 00000000 00000000"/>
+    <achVendID value="UKWN"/>
+    <fsSelection value="00000000 01000000"/>
+    <usFirstCharIndex value="32"/>
+    <usLastCharIndex value="122"/>
+    <sTypoAscender value="800"/>
+    <sTypoDescender value="-200"/>
+    <sTypoLineGap value="200"/>
+    <usWinAscent value="1000"/>
+    <usWinDescent value="200"/>
+    <ulCodePageRange1 value="00000000 00000000 00000000 00000001"/>
+    <ulCodePageRange2 value="00000000 00000000 00000000 00000000"/>
+    <sxHeight value="500"/>
+    <sCapHeight value="700"/>
+    <usDefaultChar value="0"/>
+    <usBreakChar value="32"/>
+    <usMaxContext value="0"/>
+  </OS_2>
+
+  <hmtx>
+    <mtx name=".notdef" width="500" lsb="93"/>
+    <mtx name="a" width="500" lsb="93"/>
+  </hmtx>
+
+  <cmap>
+    <tableVersion version="0"/>
+    <cmap_format_4 platformID="0" platEncID="1" language="0">
+      <map code="0x0061" name="a" />
+    </cmap_format_4>
+    <cmap_format_12 format="12" reserved="0" length="2" nGroups="1" platformID="0" platEncID="4" language="0">
+      <map code="0x0061" name="a" />
+      <map code="0x1F926" name="a" />
+    </cmap_format_12>
+  </cmap>
+
+  <loca>
+    <!-- The 'loca' table will be calculated by the compiler -->
+  </loca>
+
+  <glyf>
+    <TTGlyph name=".notdef" xMin="0" yMin="0" xMax="0" yMax="0" />
+    <TTGlyph name="a" xMin="0" yMin="0" xMax="0" yMax="0" />
+  </glyf>
+
+  <name>
+    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
+      SampleFont-Regular
+    </namerecord>
+    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
+      Regular
+    </namerecord>
+    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
+      Sample Font
+    </namerecord>
+    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
+      SampleFont-Regular
+    </namerecord>
+  </name>
+
+  <post>
+    <formatType value="3.0"/>
+    <italicAngle value="0.0"/>
+    <underlinePosition value="-75"/>
+    <underlineThickness value="50"/>
+    <isFixedPitch value="0"/>
+    <minMemType42 value="0"/>
+    <maxMemType42 value="0"/>
+    <minMemType1 value="0"/>
+    <maxMemType1 value="0"/>
+  </post>
+
+</ttFont>

diff --git a/tests/data/VariationSelectorTest-Regular.ttf b/tests/data/VariationSelectorTest-Regular.ttf
new file mode 100644
index 0000000..0504c67
--- /dev/null
+++ b/tests/data/VariationSelectorTest-Regular.ttf
Binary files differ

diff --git a/tests/data/VarioationSelectorTest-Regular.ttx b/tests/data/VariationSelectorTest-Regular.ttx
similarity index 65%
rename from tests/data/VarioationSelectorTest-Regular.ttx
rename to tests/data/VariationSelectorTest-Regular.ttx
index a063a5e..f86f008 100644
--- a/tests/data/VarioationSelectorTest-Regular.ttx
+++ b/tests/data/VariationSelectorTest-Regular.ttx

@@ -18,18 +18,7 @@
   <GlyphOrder>
     <!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
     <GlyphID id="0" name=".notdef"/>
-    <GlyphID id="1" name="BaseChar1"/>
-    <GlyphID id="2" name="BaseChar1_VS1"/>
-    <GlyphID id="3" name="BaseChar1_VS17"/>
-    <GlyphID id="4" name="BaseChar1_VS18"/>
-    <GlyphID id="5" name="BaseChar2"/>
-    <GlyphID id="6" name="BaseChar2_VS2"/>
-    <GlyphID id="7" name="BaseChar2_VS18"/>
-    <GlyphID id="8" name="BaseChar2_VS19"/>
-    <GlyphID id="9" name="BaseChar3"/>
-    <GlyphID id="10" name="BaseChar4_VS3"/>
-    <GlyphID id="11" name="BaseChar4_VS19"/>
-    <GlyphID id="12" name="BaseChar4_VS20"/>
+    <GlyphID id="1" name="defaultGlyph"/>
   </GlyphOrder>
 
   <head>
@@ -147,45 +136,36 @@
 
   <hmtx>
     <mtx name=".notdef" width="500" lsb="93"/>
-    <mtx name="BaseChar1" width="500" lsb="93"/>
-    <mtx name="BaseChar1_VS1" width="500" lsb="93"/>
-    <mtx name="BaseChar1_VS17" width="500" lsb="93"/>
-    <mtx name="BaseChar1_VS18" width="500" lsb="93"/>
-    <mtx name="BaseChar2" width="500" lsb="93"/>
-    <mtx name="BaseChar2_VS2" width="500" lsb="93"/>
-    <mtx name="BaseChar2_VS18" width="500" lsb="93"/>
-    <mtx name="BaseChar2_VS19" width="500" lsb="93"/>
-    <mtx name="BaseChar3" width="500" lsb="93"/>
-    <mtx name="BaseChar4_VS3" width="500" lsb="93"/>
-    <mtx name="BaseChar4_VS19" width="500" lsb="93"/>
-    <mtx name="BaseChar4_VS20" width="500" lsb="93"/>
+    <mtx name="defaultGlyph" width="500" lsb="93"/>
   </hmtx>
 
   <cmap>
     <tableVersion version="0"/>
     <cmap_format_4 platformID="3" platEncID="10" language="0">
-      <map code="0x82A6" name="BaseChar1" />
-      <map code="0x845B" name="BaseChar2" />
-      <map code="0x537F" name="BaseChar3" />
+      <map code="0x82A6" name="defaultGlyph" />
+      <map code="0x845B" name="defaultGlyph" />
+      <map code="0x537F" name="defaultGlyph" />
+      <map code="0x5380" name="defaultGlyph" />
     </cmap_format_4>
     <cmap_format_14 format="14" platformID="0" platEncID="5" length="40" numVarSelectorRecords="3">
-      <map uvs="0xFE00" uv="0x82A6" name="BaseChar1_VS1" />
-      <map uvs="0xE0100" uv="0x82A6" name="BaseChar1_VS17" />
-      <map uvs="0xE0101" uv="0x82A6" name="BaseChar1_VS18" />
+      <map uvs="0xFE00" uv="0x82A6" name="None" />
+      <map uvs="0xE0100" uv="0x82A6" name="None" />
+      <map uvs="0xE0101" uv="0x82A6" name="None" />
       <map uvs="0xE0102" uv="0x82A6" name="None" />
 
-      <map uvs="0xFE01" uv="0x845B" name="BaseChar2_VS2" />
-      <map uvs="0xE0101" uv="0x845B" name="BaseChar2_VS18" />
-      <map uvs="0xE0102" uv="0x845B" name="BaseChar2_VS19" />
+      <map uvs="0xFE01" uv="0x845B" name="None" />
+      <map uvs="0xE0101" uv="0x845B" name="None" />
+      <map uvs="0xE0102" uv="0x845B" name="None" />
       <map uvs="0xE0103" uv="0x845B" name="None" />
 
-      <map uvs="0xFE02" uv="0x717D" name="BaseChar4_VS3" />
-      <map uvs="0xE0102" uv="0x717D" name="BaseChar4_VS19" />
-      <map uvs="0xE0103" uv="0x717D" name="BaseChar4_VS20" />
+      <map uvs="0xFE02" uv="0x717D" name="defaultGlyph" />
+      <map uvs="0xE0102" uv="0x717D" name="defaultGlyph" />
+      <map uvs="0xE0103" uv="0x717D" name="defaultGlyph" />
       <!-- There is no default glyph for U+717D U+E0104 but there is a entry for
            default UVS entry.  hasVariationSelector should return false in this
            case.  -->
       <map uvs="0xE0104" uv="0x717D" name="None" />
+      <!-- Do not add entry for U+5380. -->
     </cmap_format_14>
   </cmap>
 
@@ -202,40 +182,7 @@
       <contour></contour><instructions><assembly></assembly></instructions>
     </TTGlyph>
 
-    <TTGlyph name="BaseChar1" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar1_VS1" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar1_VS17" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar1_VS18" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar2" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar2_VS2" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar2_VS18" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar2_VS19" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar3" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar4_VS3" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar4_VS19" xMin="0" yMin="0" xMax="0" yMax="0">
-      <contour></contour><instructions><assembly></assembly></instructions>
-    </TTGlyph>
-    <TTGlyph name="BaseChar4_VS20" xMin="0" yMin="0" xMax="0" yMax="0">
+    <TTGlyph name="defaultGlyph" xMin="0" yMin="0" xMax="0" yMax="0">
       <contour></contour><instructions><assembly></assembly></instructions>
     </TTGlyph>
   </glyf>

diff --git a/tests/data/VarioationSelectorTest-Regular.ttf b/tests/data/VarioationSelectorTest-Regular.ttf
deleted file mode 100644
index dfb0b2d..0000000
--- a/tests/data/VarioationSelectorTest-Regular.ttf
+++ /dev/null
Binary files differ

diff --git a/tests/how_to_run.txt b/tests/how_to_run.txt
deleted file mode 100644
index bee367b..0000000
--- a/tests/how_to_run.txt
+++ /dev/null

@@ -1,5 +0,0 @@
-mmm -j8 frameworks/minikin/tests &&
-adb push $OUT/data/nativetest/minikin_tests/minikin_tests \
-    /data/nativetest/minikin_tests/minikin_tests &&
-adb push frameworks/minikin/tests/data /data/nativetest/minikin_tests/ &&
-adb shell /data/nativetest/minikin_tests/minikin_tests

diff --git a/tests/perftests/Android.mk b/tests/perftests/Android.mk
new file mode 100644
index 0000000..c60123a
--- /dev/null
+++ b/tests/perftests/Android.mk

@@ -0,0 +1,52 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+perftest_src_files := \
+  ../util/FileUtils.cpp \
+  ../util/FontTestUtils.cpp \
+  ../util/MinikinFontForTest.cpp \
+  ../util/UnicodeUtils.cpp \
+  FontCollection.cpp \
+  FontFamily.cpp \
+  FontLanguage.cpp \
+  GraphemeBreak.cpp \
+  Hyphenator.cpp \
+  WordBreaker.cpp \
+  main.cpp
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := minikin_perftests
+LOCAL_CPPFLAGS := -Werror -Wall -Wextra
+LOCAL_SRC_FILES := $(perftest_src_files)
+LOCAL_STATIC_LIBRARIES := \
+  libminikin \
+  libxml2
+
+LOCAL_SHARED_LIBRARIES := \
+  libharfbuzz_ng \
+  libicuuc \
+  liblog \
+  libskia
+
+LOCAL_C_INCLUDES := \
+  $(LOCAL_PATH)/../ \
+  $(LOCAL_PATH)/../../libs/minikin \
+  external/harfbuzz_ng/src \
+  external/libxml2/include
+
+include $(BUILD_NATIVE_BENCHMARK)

diff --git a/tests/perftests/FontCollection.cpp b/tests/perftests/FontCollection.cpp
new file mode 100644
index 0000000..fd95cf1
--- /dev/null
+++ b/tests/perftests/FontCollection.cpp

@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include <memory>
+
+#include <minikin/FontCollection.h>
+#include <util/FontTestUtils.h>
+#include <util/UnicodeUtils.h>
+#include <MinikinInternal.h>
+
+namespace minikin {
+
+const char* SYSTEM_FONT_PATH = "/system/fonts/";
+const char* SYSTEM_FONT_XML = "/system/etc/fonts.xml";
+
+static void BM_FontCollection_construct(benchmark::State& state) {
+    std::vector<std::shared_ptr<FontFamily>> families =
+            getFontFamilies(SYSTEM_FONT_PATH, SYSTEM_FONT_XML);
+    while (state.KeepRunning()) {
+        std::make_shared<FontCollection>(families);
+    }
+}
+
+BENCHMARK(BM_FontCollection_construct);
+
+static void BM_FontCollection_hasVariationSelector(benchmark::State& state) {
+    std::shared_ptr<FontCollection> collection(
+            getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML));
+
+    uint32_t baseCp = state.range(0);
+    uint32_t vsCp = state.range(1);
+
+    char titleBuffer[64];
+    snprintf(titleBuffer, 64, "hasVariationSelector U+%04X,U+%04X", baseCp, vsCp);
+    state.SetLabel(titleBuffer);
+
+    while (state.KeepRunning()) {
+        collection->hasVariationSelector(baseCp, vsCp);
+    }
+}
+
+// TODO: Rewrite with BENCHMARK_CAPTURE for better test name.
+BENCHMARK(BM_FontCollection_hasVariationSelector)
+      ->ArgPair(0x2708, 0xFE0F)
+      ->ArgPair(0x2708, 0xFE0E)
+      ->ArgPair(0x3402, 0xE0100);
+
+struct ItemizeTestCases {
+    std::string itemizeText;
+    std::string languageTag;
+    std::string labelText;
+} ITEMIZE_TEST_CASES[] = {
+    { "'A' 'n' 'd' 'r' 'o' 'i' 'd'", "en", "English" },
+    { "U+4E16", "zh-Hans", "CJK Ideograph" },
+    { "U+4E16", "zh-Hans,zh-Hant,ja,en,es,pt,fr,de", "CJK Ideograph with many language fallback" },
+    { "U+3402 U+E0100", "ja", "CJK Ideograph with variation selector" },
+    { "'A' 'n' U+0E1A U+0E31 U+0645 U+062D U+0648", "en", "Mixture of English, Thai and Arabic" },
+    { "U+2708 U+FE0E", "en", "Emoji with variation selector" },
+    { "U+0031 U+FE0F U+20E3", "en", "KEYCAP" },
+};
+
+static void BM_FontCollection_itemize(benchmark::State& state) {
+    std::shared_ptr<FontCollection> collection(
+            getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML));
+
+    size_t testIndex = state.range(0);
+    state.SetLabel("Itemize: " + ITEMIZE_TEST_CASES[testIndex].labelText);
+
+    uint16_t buffer[64];
+    size_t utf16_length = 0;
+    ParseUnicode(
+            buffer, 64, ITEMIZE_TEST_CASES[testIndex].itemizeText.c_str(), &utf16_length, nullptr);
+    std::vector<FontCollection::Run> result;
+    FontStyle style(FontStyle::registerLanguageList(ITEMIZE_TEST_CASES[testIndex].languageTag));
+
+    android::AutoMutex _l(gMinikinLock);
+    while (state.KeepRunning()) {
+        result.clear();
+        collection->itemize(buffer, utf16_length, style, &result);
+    }
+}
+
+// TODO: Rewrite with BENCHMARK_CAPTURE once it is available in Android.
+BENCHMARK(BM_FontCollection_itemize)
+    ->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(5)->Arg(6);
+
+}  // namespace minikin

diff --git a/tests/perftests/FontFamily.cpp b/tests/perftests/FontFamily.cpp
new file mode 100644
index 0000000..9ab61e1
--- /dev/null
+++ b/tests/perftests/FontFamily.cpp

@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include <minikin/FontFamily.h>
+#include "../util/MinikinFontForTest.h"
+
+namespace minikin {
+
+static void BM_FontFamily_create(benchmark::State& state) {
+    std::shared_ptr<MinikinFontForTest> minikinFont =
+            std::make_shared<MinikinFontForTest>("/system/fonts/NotoSansCJK-Regular.ttc", 0);
+
+    while (state.KeepRunning()) {
+        std::shared_ptr<FontFamily> family = std::make_shared<FontFamily>(
+                std::vector<Font>({Font(minikinFont, FontStyle())}));
+    }
+}
+
+BENCHMARK(BM_FontFamily_create);
+
+}  // namespace minikin

diff --git a/tests/perftests/FontLanguage.cpp b/tests/perftests/FontLanguage.cpp
new file mode 100644
index 0000000..6c9c84d
--- /dev/null
+++ b/tests/perftests/FontLanguage.cpp

@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include "FontLanguage.h"
+
+namespace minikin {
+
+static void BM_FontLanguage_en_US(benchmark::State& state) {
+    while (state.KeepRunning()) {
+        FontLanguage language("en-US", 5);
+    }
+}
+BENCHMARK(BM_FontLanguage_en_US);
+
+static void BM_FontLanguage_en_Latn_US(benchmark::State& state) {
+    while (state.KeepRunning()) {
+        FontLanguage language("en-Latn-US", 10);
+    }
+}
+BENCHMARK(BM_FontLanguage_en_Latn_US);
+
+static void BM_FontLanguage_en_Latn_US_u_em_emoji(benchmark::State& state) {
+    while (state.KeepRunning()) {
+        FontLanguage language("en-Latn-US-u-em-emoji", 21);
+    }
+}
+BENCHMARK(BM_FontLanguage_en_Latn_US_u_em_emoji);
+
+}  // namespace minikin

diff --git a/tests/perftests/GraphemeBreak.cpp b/tests/perftests/GraphemeBreak.cpp
new file mode 100644
index 0000000..6d6cf5b
--- /dev/null
+++ b/tests/perftests/GraphemeBreak.cpp

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include <cutils/log.h>
+
+#include "minikin/GraphemeBreak.h"
+#include "util/UnicodeUtils.h"
+
+namespace minikin {
+
+const char* ASCII_TEST_STR = "'L' 'o' 'r' 'e' 'm' ' ' 'i' 'p' 's' 'u' 'm' '.'";
+// U+261D: WHITE UP POINTING INDEX
+// U+1F3FD: EMOJI MODIFIER FITZPATRICK TYPE-4
+const char* EMOJI_TEST_STR = "U+261D U+1F3FD U+261D U+1F3FD U+261D U+1F3FD U+261D U+1F3FD";
+// U+1F1FA: REGIONAL INDICATOR SYMBOL LETTER U
+// U+1F1F8: REGIONAL INDICATOR SYMBOL LETTER S
+const char* FLAGS_TEST_STR = "U+1F1FA U+1F1F8 U+1F1FA U+1F1F8 U+1F1FA U+1F1F8";
+
+// TODO: Migrate BENCHMARK_CAPTURE for parameterizing.
+static void BM_GraphemeBreak_Ascii(benchmark::State& state) {
+    size_t result_size;
+    uint16_t buffer[12];
+    ParseUnicode(buffer, 12, ASCII_TEST_STR, &result_size, nullptr);
+    LOG_ALWAYS_FATAL_IF(result_size != 12);
+    const size_t testIndex = state.range(0);
+    while (state.KeepRunning()) {
+        GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex);
+    }
+}
+BENCHMARK(BM_GraphemeBreak_Ascii)
+    ->Arg(0)  // Begining of the text.
+    ->Arg(1)  // Middle of the text.
+    ->Arg(12);  // End of the text.
+
+static void BM_GraphemeBreak_Emoji(benchmark::State& state) {
+    size_t result_size;
+    uint16_t buffer[12];
+    ParseUnicode(buffer, 12, EMOJI_TEST_STR, &result_size, nullptr);
+    LOG_ALWAYS_FATAL_IF(result_size != 12);
+    const size_t testIndex = state.range(0);
+    while (state.KeepRunning()) {
+        GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex);
+    }
+}
+BENCHMARK(BM_GraphemeBreak_Emoji)
+    ->Arg(1)  // Middle of emoji modifier sequence.
+    ->Arg(2)  // Middle of the surrogate pairs.
+    ->Arg(3);  // After emoji modifier sequence. Here is boundary of grapheme cluster.
+
+static void BM_GraphemeBreak_Emoji_Flags(benchmark::State& state) {
+    size_t result_size;
+    uint16_t buffer[12];
+    ParseUnicode(buffer, 12, FLAGS_TEST_STR, &result_size, nullptr);
+    LOG_ALWAYS_FATAL_IF(result_size != 12);
+    const size_t testIndex = state.range(0);
+    while (state.KeepRunning()) {
+        GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex);
+    }
+}
+BENCHMARK(BM_GraphemeBreak_Emoji_Flags)
+    ->Arg(2)  // Middle of flag sequence.
+    ->Arg(4)  // After flag sequence. Here is boundary of grapheme cluster.
+    ->Arg(10); // Middle of 3rd flag sequence.
+
+}  // namespace minikin

diff --git a/tests/perftests/Hyphenator.cpp b/tests/perftests/Hyphenator.cpp
new file mode 100644
index 0000000..2107e05
--- /dev/null
+++ b/tests/perftests/Hyphenator.cpp

@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include <minikin/Hyphenator.h>
+#include <util/FileUtils.h>
+#include <util/UnicodeUtils.h>
+
+namespace minikin {
+
+const char* enUsHyph = "/system/usr/hyphen-data/hyph-en-us.hyb";
+const int enUsMinPrefix = 2;
+const int enUsMinSuffix = 3;
+const icu::Locale& usLocale = icu::Locale::getUS();
+
+static void BM_Hyphenator_short_word(benchmark::State& state) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(
+            readWholeFile(enUsHyph).data(), enUsMinPrefix, enUsMinSuffix);
+    std::vector<uint16_t> word = utf8ToUtf16("hyphen");
+    std::vector<HyphenationType> result;
+    while (state.KeepRunning()) {
+        hyphenator->hyphenate(&result, word.data(), word.size(), usLocale);
+    }
+    Hyphenator::loadBinary(nullptr, 2, 2);
+}
+
+// TODO: Use BENCHMARK_CAPTURE for parametrise.
+BENCHMARK(BM_Hyphenator_short_word);
+
+static void BM_Hyphenator_long_word(benchmark::State& state) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(
+            readWholeFile(enUsHyph).data(), enUsMinPrefix, enUsMinSuffix);
+    std::vector<uint16_t> word = utf8ToUtf16(
+            "Pneumonoultramicroscopicsilicovolcanoconiosis");
+    std::vector<HyphenationType> result;
+    while (state.KeepRunning()) {
+        hyphenator->hyphenate(&result, word.data(), word.size(), usLocale);
+    }
+    Hyphenator::loadBinary(nullptr, 2, 2);
+}
+
+// TODO: Use BENCHMARK_CAPTURE for parametrise.
+BENCHMARK(BM_Hyphenator_long_word);
+
+// TODO: Add more tests for other languages.
+
+}  // namespace minikin

diff --git a/tests/perftests/WordBreaker.cpp b/tests/perftests/WordBreaker.cpp
new file mode 100644
index 0000000..6758cf9
--- /dev/null
+++ b/tests/perftests/WordBreaker.cpp

@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include "minikin/WordBreaker.h"
+#include "util/UnicodeUtils.h"
+
+namespace minikin {
+
+static void BM_WordBreaker_English(benchmark::State& state) {
+    const char* kLoremIpsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
+        "eiusmod tempor incididunt ut labore et dolore magna aliqua.";
+
+    WordBreaker wb;
+    wb.setLocale(icu::Locale::getEnglish());
+    std::vector<uint16_t> text = utf8ToUtf16(kLoremIpsum);
+    while (state.KeepRunning()) {
+        wb.setText(text.data(), text.size());
+        while (wb.next() != -1) {}
+    }
+}
+BENCHMARK(BM_WordBreaker_English);
+
+// TODO: Add more tests for other languages.
+
+}  // namespace minikin

diff --git a/tests/perftests/how_to_run.txt b/tests/perftests/how_to_run.txt
new file mode 100644
index 0000000..f55a8ac
--- /dev/null
+++ b/tests/perftests/how_to_run.txt

@@ -0,0 +1,3 @@
+mmm -j8 frameworks/minikin/tests/perftests &&
+adb sync data &&
+adb shell /data/benchmarktest/minikin_perftests/minikin_perftests

diff --git a/tests/perftests/main.cpp b/tests/perftests/main.cpp
new file mode 100644
index 0000000..e6f9d14
--- /dev/null
+++ b/tests/perftests/main.cpp

@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmark/benchmark.h>
+
+#include <cutils/log.h>
+
+#include <unicode/uclean.h>
+#include <unicode/udata.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+int main(int argc, char** argv) {
+    const char* fn = "/system/usr/icu/" U_ICUDATA_NAME ".dat";
+    int fd = open(fn, O_RDONLY);
+    LOG_ALWAYS_FATAL_IF(fd == -1);
+    struct stat st;
+    LOG_ALWAYS_FATAL_IF(fstat(fd, &st) != 0);
+    void* data = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+
+    UErrorCode errorCode = U_ZERO_ERROR;
+    udata_setCommonData(data, &errorCode);
+    LOG_ALWAYS_FATAL_IF(U_FAILURE(errorCode));
+    u_init(&errorCode);
+    LOG_ALWAYS_FATAL_IF(U_FAILURE(errorCode));
+
+    benchmark::Initialize(&argc, argv);
+    benchmark::RunSpecifiedBenchmarks();
+
+    u_cleanup();
+    return 0;
+}

diff --git a/tests/stresstest/Android.mk b/tests/stresstest/Android.mk
new file mode 100644
index 0000000..b655a74
--- /dev/null
+++ b/tests/stresstest/Android.mk

@@ -0,0 +1,57 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# see how_to_run.txt for instructions on running these tests
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_TEST_DATA := $(foreach f,$(LOCAL_TEST_DATA),frameworks/minikin/tests:$(f))
+
+LOCAL_MODULE := minikin_stress_tests
+LOCAL_MODULE_TAGS := tests
+LOCAL_MODULE_CLASS := NATIVE_TESTS
+
+LOCAL_STATIC_LIBRARIES := libminikin
+
+# Shared libraries which are dependencies of minikin; these are not automatically
+# pulled in by the build system (and thus sadly must be repeated).
+
+LOCAL_SHARED_LIBRARIES := \
+    libskia \
+    libft2 \
+    libharfbuzz_ng \
+    libicuuc \
+    liblog \
+    libutils \
+    libz
+
+LOCAL_STATIC_LIBRARIES += \
+    libxml2
+
+LOCAL_SRC_FILES += \
+    ../util/FontTestUtils.cpp \
+    ../util/MinikinFontForTest.cpp \
+    FontFamilyTest.cpp \
+    MultithreadTest.cpp \
+
+LOCAL_C_INCLUDES := \
+    $(LOCAL_PATH)/../../libs/minikin/ \
+    $(LOCAL_PATH)/../util \
+    external/libxml2/include \
+
+LOCAL_CPPFLAGS += -Werror -Wall -Wextra
+
+include $(BUILD_NATIVE_TEST)

diff --git a/tests/stresstest/FontFamilyTest.cpp b/tests/stresstest/FontFamilyTest.cpp
new file mode 100644
index 0000000..9d289e5
--- /dev/null
+++ b/tests/stresstest/FontFamilyTest.cpp

@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../util/FontTestUtils.h"
+#include "../util/MinikinFontForTest.h"
+#include "HbFontCache.h"
+#include "MinikinInternal.h"
+#include "minikin/FontCollection.h"
+#include "minikin/Layout.h"
+
+namespace minikin {
+
+typedef std::pair<std::string, int> TestParam;
+
+class FontFamilyHarfBuzzCompatibilityTest : public ::testing::TestWithParam<TestParam> {};
+
+TEST_P(FontFamilyHarfBuzzCompatibilityTest, CoverageTest) {
+    const std::string& fontPath = GetParam().first;
+    int ttcIndex = GetParam().second;
+
+    std::shared_ptr<MinikinFont> font(new MinikinFontForTest(fontPath, ttcIndex));
+    std::shared_ptr<FontFamily> family =
+            std::make_shared<FontFamily>(std::vector<Font>({Font(font, FontStyle())}));
+
+    android::AutoMutex _l(gMinikinLock);
+    hb_font_t* hbFont = getHbFontLocked(font.get());
+
+    for (uint32_t codePoint = 0; codePoint < MAX_UNICODE_CODE_POINT; ++codePoint) {
+        uint32_t unusedGlyph;
+        EXPECT_EQ(family->hasGlyph(codePoint, 0 /* variation selector */),
+                static_cast<bool>(hb_font_get_glyph(hbFont, codePoint, 0 /* variation selector */,
+                        &unusedGlyph)));
+    }
+
+    for (uint32_t vs = VS1; vs < VS256; ++vs) {
+        // Move to variation selectors supplements after variation selectors.
+        if (vs == VS16 + 1) {
+            vs = VS17;
+        }
+        for (uint32_t codePoint = 0; codePoint < MAX_UNICODE_CODE_POINT; ++codePoint) {
+            uint32_t unusedGlyph;
+            ASSERT_EQ(family->hasGlyph(codePoint, vs),
+                    static_cast<bool>(hb_font_get_glyph(hbFont, codePoint, vs, &unusedGlyph)))
+                << "Inconsistent Result: " << fontPath << "#" << ttcIndex
+                << ": U+" << std::hex << codePoint << " U+" << std::hex << vs
+                << " Minikin: " << family->hasGlyph(codePoint, vs)
+                << " HarfBuzz: "
+                << static_cast<bool>(hb_font_get_glyph(hbFont, codePoint, vs, &unusedGlyph));
+
+        }
+    }
+    hb_font_destroy(hbFont);
+}
+
+INSTANTIATE_TEST_CASE_P(FontFamilyTest,
+        FontFamilyHarfBuzzCompatibilityTest,
+        ::testing::Values(
+                TestParam("/system/fonts/NotoSansCJK-Regular.ttc", 0),
+                TestParam("/system/fonts/NotoColorEmoji.ttf", 0)));
+}  // namespace minikin

diff --git a/tests/stresstest/MultithreadTest.cpp b/tests/stresstest/MultithreadTest.cpp
new file mode 100644
index 0000000..08c94b9
--- /dev/null
+++ b/tests/stresstest/MultithreadTest.cpp

@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <condition_variable>
+#include <mutex>
+#include <random>
+#include <thread>
+
+#include <cutils/log.h>
+
+#include "MinikinInternal.h"
+#include "minikin/FontCollection.h"
+#include "minikin/Layout.h"
+#include "../util/FontTestUtils.h"
+
+namespace minikin {
+
+const char* SYSTEM_FONT_PATH = "/system/fonts/";
+const char* SYSTEM_FONT_XML = "/system/etc/fonts.xml";
+
+constexpr int LAYOUT_COUNT_PER_COLLECTION = 500;
+constexpr int COLLECTION_COUNT_PER_THREAD = 15;
+constexpr int NUM_THREADS = 10;
+
+std::mutex gMutex;
+std::condition_variable gCv;
+bool gReady = false;
+
+static std::vector<uint16_t> generateTestText(
+        std::mt19937* mt, int lettersInWord, int wordsInText) {
+    std::uniform_int_distribution<uint16_t> dist('A', 'Z');
+
+    std::vector<uint16_t> text;
+    text.reserve((lettersInWord + 1) * wordsInText - 1);
+    for (int i = 0; i < wordsInText; ++i) {
+        if (i != 0) {
+            text.emplace_back(' ');
+        }
+        for (int j = 0; j < lettersInWord; ++j) {
+            text.emplace_back(dist(*mt));
+        }
+    }
+    return text;
+}
+
+static void thread_main(int tid) {
+    {
+        // Wait until all threads are created.
+        std::unique_lock<std::mutex> lock(gMutex);
+        gCv.wait(lock, [] { return gReady; });
+    }
+
+    std::mt19937 mt(tid);
+    MinikinPaint paint;
+
+    for (int i = 0; i < COLLECTION_COUNT_PER_THREAD; ++i) {
+        std::shared_ptr<FontCollection> collection(
+                getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML));
+
+        for (int j = 0; j < LAYOUT_COUNT_PER_COLLECTION; ++j) {
+            // Generates 10 of 3-letter words so that the word sometimes hit the cache.
+            Layout layout;
+            std::vector<uint16_t> text = generateTestText(&mt, 3, 10);
+            layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(),
+                    paint, collection);
+            std::vector<float> advances(text.size());
+            layout.getAdvances(advances.data());
+            for (size_t k = 0; k < advances.size(); ++k) {
+                // MinikinFontForTest always returns 10.0f for horizontal advance.
+                LOG_ALWAYS_FATAL_IF(advances[k] != 10.0f, "Memory corruption detected.");
+            }
+        }
+    }
+}
+
+TEST(MultithreadTest, ThreadSafeStressTest) {
+    std::vector<std::thread> threads;
+
+    {
+        std::unique_lock<std::mutex> lock(gMutex);
+        threads.reserve(NUM_THREADS);
+        for (int i = 0; i < NUM_THREADS; ++i) {
+            threads.emplace_back(&thread_main, i);
+        }
+        gReady = true;
+    }
+    gCv.notify_all();
+
+    for (auto& thread : threads) {
+        thread.join();
+    }
+}
+
+}  // namespace minikin

diff --git a/tests/stresstest/how_to_run.txt b/tests/stresstest/how_to_run.txt
new file mode 100644
index 0000000..ba4dbdf
--- /dev/null
+++ b/tests/stresstest/how_to_run.txt

@@ -0,0 +1,3 @@
+mmm -j8 frameworks/minikin/tests/stresstest &&
+adb sync data &&
+adb shell /data/nativetest/minikin_tests/minikin_stress_tests

diff --git a/tests/Android.mk b/tests/unittest/Android.mk
similarity index 66%
rename from tests/Android.mk
rename to tests/unittest/Android.mk
index b33631e..b817c46 100644
--- a/tests/Android.mk
+++ b/tests/unittest/Android.mk

@@ -18,41 +18,36 @@
 
 include $(CLEAR_VARS)
 
-data_root_for_test_zip := $(TARGET_OUT_DATA)/DATA/
-minikin_tests_subpath_from_data := nativetest/minikin_tests
-minikin_tests_root_in_device := /data/$(minikin_tests_subpath_from_data)
-minikin_tests_root_for_test_zip := $(data_root_for_test_zip)/$(minikin_tests_subpath_from_data)
-
-font_src_files := \
-    data/BoldItalic.ttf \
+LOCAL_TEST_DATA := \
     data/Bold.ttf \
+    data/BoldItalic.ttf \
     data/ColorEmojiFont.ttf \
     data/ColorTextMixedEmojiFont.ttf \
     data/Emoji.ttf \
     data/Italic.ttf \
     data/Ja.ttf \
     data/Ko.ttf \
+    data/MultiAxis.ttf \
+    data/NoCmapFormat14.ttf \
     data/NoGlyphFont.ttf \
     data/Regular.ttf \
     data/TextEmojiFont.ttf \
-    data/VarioationSelectorTest-Regular.ttf \
+    data/UnicodeBMPOnly.ttf \
+    data/UnicodeBMPOnly2.ttf \
+    data/UnicodeUCS4.ttf \
+    data/VariationSelectorTest-Regular.ttf \
     data/ZhHans.ttf \
     data/ZhHant.ttf \
+    data/emoji.xml \
     data/itemize.xml \
-    data/emoji.xml
+
+LOCAL_TEST_DATA := $(foreach f,$(LOCAL_TEST_DATA),frameworks/minikin/tests:$(f))
 
 LOCAL_MODULE := minikin_tests
 LOCAL_MODULE_TAGS := tests
-
-GEN := $(addprefix $(minikin_tests_root_for_test_zip)/, $(font_src_files))
-$(GEN): PRIVATE_PATH := $(LOCAL_PATH)
-$(GEN): PRIVATE_CUSTOM_TOOL = cp $< $@
-$(GEN): $(minikin_tests_root_for_test_zip)/data/% : $(LOCAL_PATH)/data/%
-	$(transform-generated-source)
-LOCAL_GENERATED_SOURCES += $(GEN)
+LOCAL_MODULE_CLASS := NATIVE_TESTS
 
 LOCAL_STATIC_LIBRARIES := libminikin
-LOCAL_PICKUP_FILES := $(data_root_for_test_zip)
 
 # Shared libraries which are dependencies of minikin; these are not automatically
 # pulled in by the build system (and thus sadly must be repeated).
@@ -70,26 +65,36 @@
     libxml2
 
 LOCAL_SRC_FILES += \
+    ../util/FileUtils.cpp \
+    ../util/FontTestUtils.cpp \
+    ../util/MinikinFontForTest.cpp \
+    ../util/UnicodeUtils.cpp \
+    CmapCoverageTest.cpp \
+    EmojiTest.cpp \
     FontCollectionTest.cpp \
     FontCollectionItemizeTest.cpp \
     FontFamilyTest.cpp \
     FontLanguageListCacheTest.cpp \
-    FontTestUtils.cpp \
     HbFontCacheTest.cpp \
-    MinikinFontForTest.cpp \
-    MinikinInternalTest.cpp \
+    HyphenatorTest.cpp \
     GraphemeBreakTests.cpp \
+    LayoutTest.cpp \
     LayoutUtilsTest.cpp \
-    UnicodeUtils.cpp \
+    MeasurementTests.cpp \
+    SparseBitSetTest.cpp \
+    UnicodeUtilsTest.cpp \
     WordBreakerTests.cpp
 
 LOCAL_C_INCLUDES := \
-    $(LOCAL_PATH)/../libs/minikin/ \
+    $(LOCAL_PATH)/../../libs/minikin/ \
+    $(LOCAL_PATH)/../util \
     external/harfbuzz_ng/src \
     external/libxml2/include \
     external/skia/src/core
 
-LOCAL_CPPFLAGS += -Werror -Wall -Wextra \
-    -DkTestFontDir="\"$(minikin_tests_root_in_device)/data/\""
+LOCAL_CPPFLAGS += -Werror -Wall -Wextra
+
+LOCAL_CPPFLAGS_32 += -DkTestFontDir="\"/data/nativetest/minikin_tests/data/\""
+LOCAL_CPPFLAGS_64 += -DkTestFontDir="\"/data/nativetest64/minikin_tests/data/\""
 
 include $(BUILD_NATIVE_TEST)

diff --git a/tests/unittest/CmapCoverageTest.cpp b/tests/unittest/CmapCoverageTest.cpp
new file mode 100644
index 0000000..fe2d7ba
--- /dev/null
+++ b/tests/unittest/CmapCoverageTest.cpp

@@ -0,0 +1,1111 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <random>
+
+#include <log/log.h>
+#include <gtest/gtest.h>
+#include <minikin/CmapCoverage.h>
+#include <minikin/SparseBitSet.h>
+
+#include "MinikinInternal.h"
+
+namespace minikin {
+
+static constexpr uint16_t VS_PLATFORM_ID = 0;
+static constexpr uint16_t VS_ENCODING_ID = 5;
+
+size_t writeU8(uint8_t x, uint8_t* out, size_t offset) {
+    out[offset] = x;
+    return offset + 1;
+}
+
+size_t writeU16(uint16_t x, uint8_t* out, size_t offset) {
+    out[offset] = x >> 8;
+    out[offset + 1] = x;
+    return offset + 2;
+}
+
+size_t writeI16(int16_t sx, uint8_t* out, size_t offset) {
+    return writeU16(static_cast<uint16_t>(sx), out, offset);
+}
+
+size_t writeU24(uint32_t x, uint8_t* out, size_t offset) {
+    out[offset] = x >> 16;
+    out[offset + 1] = x >> 8;
+    out[offset + 2] = x;
+    return offset + 3;
+}
+
+size_t writeU32(uint32_t x, uint8_t* out, size_t offset) {
+    out[offset] = x >> 24;
+    out[offset + 1] = x >> 16;
+    out[offset + 2] = x >> 8;
+    out[offset + 3] = x;
+    return offset + 4;
+}
+
+// Returns valid cmap format 4 table contents. All glyph ID is same value as code point. (e.g.
+// 'a' (U+0061) is mapped to Glyph ID = 0x0061).
+// 'range' should be specified with inclusive-inclusive values.
+static std::vector<uint8_t> buildCmapFormat4Table(const std::vector<uint16_t>& ranges) {
+    uint16_t segmentCount = ranges.size() / 2 + 1 /* +1 for end marker */;
+
+    const size_t numOfUint16 =
+        8 /* format, length, languages, segCountX2, searchRange, entrySelector, rangeShift, pad */ +
+        segmentCount * 4 /* endCount, startCount, idRange, idRangeOffset */;
+    const size_t finalLength = sizeof(uint16_t) * numOfUint16;
+
+    std::vector<uint8_t> out(finalLength);
+    size_t head = 0;
+    head = writeU16(4, out.data(), head);  // format
+    head = writeU16(finalLength, out.data(), head);  // length
+    head = writeU16(0, out.data(), head);  // langauge
+
+    const uint16_t searchRange = 2 * (1 << static_cast<int>(floor(log2(segmentCount))));
+
+    head = writeU16(segmentCount * 2, out.data(), head);  // segCountX2
+    head = writeU16(searchRange, out.data(), head);  // searchRange
+    head = writeU16(__builtin_ctz(searchRange) - 1, out.data(), head); // entrySelector
+    head = writeU16(segmentCount * 2 - searchRange, out.data(), head);  // rangeShift
+
+    size_t endCountHead = head;
+    size_t startCountHead = head + segmentCount * sizeof(uint16_t) + 2 /* padding */;
+    size_t idDeltaHead = startCountHead + segmentCount * sizeof(uint16_t);
+    size_t idRangeOffsetHead = idDeltaHead + segmentCount * sizeof(uint16_t);
+
+    for (size_t i = 0; i < ranges.size() / 2; ++i) {
+        const uint16_t begin = ranges[i * 2];
+        const uint16_t end = ranges[i * 2 + 1];
+        startCountHead = writeU16(begin, out.data(), startCountHead);
+        endCountHead = writeU16(end, out.data(), endCountHead);
+        // map glyph ID as the same value of the code point.
+        idDeltaHead = writeU16(0, out.data(), idDeltaHead);
+        idRangeOffsetHead = writeU16(0 /* we don't use this */, out.data(), idRangeOffsetHead);
+    }
+
+    // fill end marker
+    endCountHead = writeU16(0xFFFF, out.data(), endCountHead);
+    startCountHead = writeU16(0xFFFF, out.data(), startCountHead);
+    idDeltaHead = writeU16(1, out.data(), idDeltaHead);
+    idRangeOffsetHead = writeU16(0, out.data(), idRangeOffsetHead);
+    LOG_ALWAYS_FATAL_IF(endCountHead > finalLength);
+    LOG_ALWAYS_FATAL_IF(startCountHead > finalLength);
+    LOG_ALWAYS_FATAL_IF(idDeltaHead > finalLength);
+    LOG_ALWAYS_FATAL_IF(idRangeOffsetHead != finalLength);
+    return out;
+}
+
+// Returns valid cmap format 4 table contents. All glyph ID is same value as code point. (e.g.
+// 'a' (U+0061) is mapped to Glyph ID = 0x0061).
+// 'range' should be specified with inclusive-inclusive values.
+static std::vector<uint8_t> buildCmapFormat12Table(const std::vector<uint32_t>& ranges) {
+    uint32_t numGroups  = ranges.size() / 2;
+
+    const size_t finalLength = 2 /* format */ + 2 /* reserved */ + 4 /* length */ +
+        4 /* languages */ + 4 /* numGroups */ + 12 /* size of a group */ * numGroups;
+
+    std::vector<uint8_t> out(finalLength);
+    size_t head = 0;
+    head = writeU16(12, out.data(), head);  // format
+    head = writeU16(0, out.data(), head);  // reserved
+    head = writeU32(finalLength, out.data(), head);  // length
+    head = writeU32(0, out.data(), head);  // langauge
+    head = writeU32(numGroups, out.data(), head);  // numGroups
+
+    for (uint32_t i = 0; i < numGroups; ++i) {
+        const uint32_t start = ranges[2 * i];
+        const uint32_t end = ranges[2 * i + 1];
+        head = writeU32(start, out.data(), head);
+        head = writeU32(end, out.data(), head);
+        // map glyph ID as the same value of the code point.
+        // TODO: Use glyph IDs lower than 65535.
+        // Cmap can store 32 bit glyph ID but due to the size of numGlyph, a font file can contain
+        // up to 65535 glyphs in a file.
+        head = writeU32(start, out.data(), head);
+    }
+
+    LOG_ALWAYS_FATAL_IF(head != finalLength);
+    return out;
+}
+
+struct VariationSelectorRecord {
+    uint32_t codePoint;
+    std::vector<uint32_t> defaultUVSRanges;
+    std::vector<uint32_t> nonDefaultUVS;
+
+    std::vector<uint8_t> getDefaultUVSAsBinary() const {
+        if (defaultUVSRanges.empty()) {
+            return std::vector<uint8_t>();
+        }
+        const size_t numOfRanges = defaultUVSRanges.size() / 2;
+        const size_t length = sizeof(uint32_t) /* numUnicodeValueRanges */ +
+            numOfRanges * 4 /* size of Unicode Range Table */;
+
+        std::vector<uint8_t> out(length);
+        size_t head = 0;
+        head = writeU32(numOfRanges, out.data(), head);
+        for (size_t i = 0; i < numOfRanges; ++i) {
+            const uint32_t startUnicodeValue = defaultUVSRanges[i * 2];
+            const uint32_t endUnicodeValue = defaultUVSRanges[i * 2 + 1];
+            head = writeU24(startUnicodeValue, out.data(), head);
+            head = writeU8(endUnicodeValue - startUnicodeValue, out.data(), head);
+        }
+        LOG_ALWAYS_FATAL_IF(head != length);
+        return out;
+    }
+
+    std::vector<uint8_t> getNonDefaultUVSAsBinary() const {
+        if (nonDefaultUVS.empty()) {
+            return std::vector<uint8_t>();
+        }
+        const size_t length = sizeof(uint32_t) /* numUnicodeValueRanges */ +
+            nonDefaultUVS.size() * 5 /* size of UVS Mapping Record */;
+
+        std::vector<uint8_t> out(length);
+        size_t head = 0;
+        head = writeU32(nonDefaultUVS.size(), out.data(), head);
+        for (uint32_t codePoint : nonDefaultUVS) {
+            head = writeU24(codePoint, out.data(), head);
+            head = writeU16(4 /* fixed glyph id */, out.data(), head);
+        }
+        LOG_ALWAYS_FATAL_IF(head != length);
+        return out;
+    }
+};
+
+static std::vector<uint8_t> buildCmapFormat14Table(
+        const std::vector<VariationSelectorRecord>& vsRecords) {
+
+    const size_t headerLength = sizeof(uint16_t) /* format */ + sizeof(uint32_t) /* length */ +
+            sizeof(uint32_t) /* numVarSelectorRecords */ +
+            11 /* size of variation selector record */ * vsRecords.size();
+
+    std::vector<uint8_t> out(headerLength);
+    size_t head = 0;
+    head = writeU16(14, out.data(), head);  // format
+    head += sizeof(uint32_t);  // length will be filled later
+    head = writeU32(vsRecords.size(), out.data(), head);  // numVarSelectorRecords;
+
+    for (const auto& record : vsRecords) {
+        const uint32_t vsCodePoint = record.codePoint;
+        head = writeU24(vsCodePoint, out.data(), head);
+
+        std::vector<uint8_t> defaultUVS = record.getDefaultUVSAsBinary();
+        if (defaultUVS.empty()) {
+            head = writeU32(0, out.data(), head);
+        } else {
+            head = writeU32(out.size(), out.data(), head);
+            out.insert(out.end(), defaultUVS.begin(), defaultUVS.end());
+        }
+
+        std::vector<uint8_t> nonDefaultUVS = record.getNonDefaultUVSAsBinary();
+        if (nonDefaultUVS.empty()) {
+            head = writeU32(0, out.data(), head);
+        } else {
+            head = writeU32(out.size(), out.data(), head);
+            out.insert(out.end(), nonDefaultUVS.begin(), nonDefaultUVS.end());
+        }
+    }
+    LOG_ALWAYS_FATAL_IF(head != headerLength);
+    writeU32(out.size(), out.data(), 2);  // fill the length.
+    return out;
+}
+
+class CmapBuilder {
+public:
+    static constexpr size_t kEncodingTableHead = 4;
+    static constexpr size_t kEncodingTableSize = 8;
+
+    CmapBuilder(int numTables) : mNumTables(numTables), mCurrentTableIndex(0) {
+        const size_t headerSize =
+            2 /* version */ + 2 /* numTables */ + kEncodingTableSize * numTables;
+        out.resize(headerSize);
+        writeU16(0, out.data(), 0);
+        writeU16(numTables, out.data(), 2);
+    }
+
+    void appendTable(uint16_t platformId, uint16_t encodingId,
+            const std::vector<uint8_t>& table) {
+        appendEncodingTable(platformId, encodingId, out.size());
+        out.insert(out.end(), table.begin(), table.end());
+    }
+
+    std::vector<uint8_t> build() {
+        LOG_ALWAYS_FATAL_IF(mCurrentTableIndex != mNumTables);
+        return out;
+    }
+
+    // Helper functions.
+    static std::vector<uint8_t> buildSingleFormat4Cmap(uint16_t platformId, uint16_t encodingId,
+            const std::vector<uint16_t>& ranges) {
+        CmapBuilder builder(1);
+        builder.appendTable(platformId, encodingId, buildCmapFormat4Table(ranges));
+        return builder.build();
+    }
+
+    static std::vector<uint8_t> buildSingleFormat12Cmap(uint16_t platformId, uint16_t encodingId,
+            const std::vector<uint32_t>& ranges) {
+        CmapBuilder builder(1);
+        builder.appendTable(platformId, encodingId, buildCmapFormat12Table(ranges));
+        return builder.build();
+    }
+
+private:
+    void appendEncodingTable(uint16_t platformId, uint16_t encodingId, uint32_t offset) {
+        LOG_ALWAYS_FATAL_IF(mCurrentTableIndex == mNumTables);
+
+        const size_t currentEncodingTableHead =
+                kEncodingTableHead + mCurrentTableIndex * kEncodingTableSize;
+        size_t head = writeU16(platformId, out.data(), currentEncodingTableHead);
+        head = writeU16(encodingId, out.data(), head);
+        head = writeU32(offset, out.data(), head);
+        LOG_ALWAYS_FATAL_IF((head - currentEncodingTableHead) != kEncodingTableSize);
+        mCurrentTableIndex++;
+    }
+
+    int mNumTables;
+    int mCurrentTableIndex;
+    std::vector<uint8_t> out;
+};
+
+TEST(CmapCoverageTest, SingleFormat4_brokenCmap) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    {
+        SCOPED_TRACE("Reading beyond buffer size - Too small cmap size");
+        std::vector<uint8_t> cmap =
+                CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector<uint16_t>({'a', 'a'}));
+
+        SparseBitSet coverage =
+                CmapCoverage::getCoverage(cmap.data(), 3 /* too small */, &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reading beyond buffer size - space needed for tables goes beyond cmap size");
+        std::vector<uint8_t> cmap =
+                CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector<uint16_t>({'a', 'a'}));
+
+        writeU16(1000, cmap.data(), 2 /* offset of num tables in cmap header */);
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reading beyond buffer size - Invalid offset in encoding table");
+        std::vector<uint8_t> cmap =
+                CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector<uint16_t>({'a', 'a'}));
+
+        writeU16(1000, cmap.data(), 8 /* offset of the offset in the first encoding record */);
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed range");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector<uint16_t>(
+                {'b', 'b', 'a', 'a'}));
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed range - partially readable");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector<uint16_t>(
+                { 'a', 'a', 'c', 'c', 'b', 'b'}));
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, SingleFormat4) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    struct TestCast {
+        std::string testTitle;
+        uint16_t platformId;
+        uint16_t encodingId;
+    } TEST_CASES[] = {
+        { "Platform 0, Encoding 0", 0, 0 },
+        { "Platform 0, Encoding 1", 0, 1 },
+        { "Platform 0, Encoding 2", 0, 2 },
+        { "Platform 0, Encoding 3", 0, 3 },
+        { "Platform 3, Encoding 1", 3, 1 },
+    };
+
+    for (const auto& testCase : TEST_CASES) {
+        SCOPED_TRACE(testCase.testTitle.c_str());
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat4Cmap(
+                testCase.platformId, testCase.encodingId, std::vector<uint16_t>({'a', 'a'}));
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));
+        EXPECT_FALSE(coverage.get('b'));
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, SingleFormat12) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+
+    struct TestCast {
+        std::string testTitle;
+        uint16_t platformId;
+        uint16_t encodingId;
+    } TEST_CASES[] = {
+        { "Platform 0, Encoding 4", 0, 4 },
+        { "Platform 0, Encoding 6", 0, 6 },
+        { "Platform 3, Encoding 10", 3, 10 },
+    };
+
+    for (const auto& testCase : TEST_CASES) {
+        SCOPED_TRACE(testCase.testTitle.c_str());
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat12Cmap(
+                testCase.platformId, testCase.encodingId, std::vector<uint32_t>({'a', 'a'}));
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));
+        EXPECT_FALSE(coverage.get('b'));
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, Format12_beyondTheUnicodeLimit) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    {
+        SCOPED_TRACE("Starting range is out of Unicode code point. Should be ignored.");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat12Cmap(
+                0, 0, std::vector<uint32_t>({'a', 'a', 0x110000, 0x110000}));
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));
+        EXPECT_FALSE(coverage.get(0x110000));
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Ending range is out of Unicode code point. Should be ignored.");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat12Cmap(
+                0, 0, std::vector<uint32_t>({'a', 'a', 0x10FF00, 0x110000}));
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));
+        EXPECT_TRUE(coverage.get(0x10FF00));
+        EXPECT_TRUE(coverage.get(0x10FFFF));
+        EXPECT_FALSE(coverage.get(0x110000));
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, notSupportedEncodings) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+
+    struct TestCast {
+        std::string testTitle;
+        uint16_t platformId;
+        uint16_t encodingId;
+    } TEST_CASES[] = {
+        // Any encodings with platform 2 is not supported.
+        { "Platform 2, Encoding 0", 2, 0 },
+        { "Platform 2, Encoding 1", 2, 1 },
+        { "Platform 2, Encoding 2", 2, 2 },
+        { "Platform 2, Encoding 3", 2, 3 },
+        // UCS-2 or UCS-4 are supported on Platform == 3. Others are not supported.
+        { "Platform 3, Encoding 0", 3, 0 },  // Symbol
+        { "Platform 3, Encoding 2", 3, 2 },  // ShiftJIS
+        { "Platform 3, Encoding 3", 3, 3 },  // RPC
+        { "Platform 3, Encoding 4", 3, 4 },  // Big5
+        { "Platform 3, Encoding 5", 3, 5 },  // Wansung
+        { "Platform 3, Encoding 6", 3, 6 },  // Johab
+        { "Platform 3, Encoding 7", 3, 7 },  // Reserved
+        { "Platform 3, Encoding 8", 3, 8 },  // Reserved
+        { "Platform 3, Encoding 9", 3, 9 },  // Reserved
+        // Uknown platforms
+        { "Platform 4, Encoding 0", 4, 0 },
+        { "Platform 5, Encoding 1", 5, 1 },
+        { "Platform 6, Encoding 0", 6, 0 },
+        { "Platform 7, Encoding 1", 7, 1 },
+    };
+
+    for (const auto& testCase : TEST_CASES) {
+        SCOPED_TRACE(testCase.testTitle.c_str());
+        CmapBuilder builder(1);
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat4Cmap(
+                testCase.platformId, testCase.encodingId, std::vector<uint16_t>({'a', 'a'}));
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, brokenFormat4Table) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    {
+        SCOPED_TRACE("Too small table cmap size");
+        std::vector<uint8_t> table = buildCmapFormat4Table(std::vector<uint16_t>({'a', 'a'}));
+        table.resize(2);  // Remove trailing data.
+
+        CmapBuilder builder(1);
+        builder.appendTable(0, 0, table);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Too many segments");
+        std::vector<uint8_t> table = buildCmapFormat4Table(std::vector<uint16_t>({'a', 'a'}));
+        writeU16(5000, table.data(), 6 /* segment count offset */);  // 5000 segments.
+        CmapBuilder builder(1);
+        builder.appendTable(0, 0, table);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Inversed range");
+        std::vector<uint8_t> table = buildCmapFormat4Table(std::vector<uint16_t>({'b', 'b'}));
+        // Put smaller end code point to inverse the range.
+        writeU16('a', table.data(), 14 /* the first element of endCount offset */);
+        CmapBuilder builder(1);
+        builder.appendTable(0, 0, table);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, brokenFormat12Table) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    {
+        SCOPED_TRACE("Too small cmap size");
+        std::vector<uint8_t> table = buildCmapFormat12Table(std::vector<uint32_t>({'a', 'a'}));
+        table.resize(2);  // Remove trailing data.
+
+        CmapBuilder builder(1);
+        builder.appendTable(0, 0, table);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Too many groups");
+        std::vector<uint8_t> table = buildCmapFormat12Table(std::vector<uint32_t>({'a', 'a'}));
+        writeU32(5000, table.data(), 12 /* num group offset */);  // 5000 groups.
+
+        CmapBuilder builder(1);
+        builder.appendTable(0, 0, table);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Inversed range.");
+        std::vector<uint8_t> table = buildCmapFormat12Table(std::vector<uint32_t>({'a', 'a'}));
+        // Put larger start code point to inverse the range.
+        writeU32('b', table.data(), 16 /* start code point offset in the first  group */);
+
+        CmapBuilder builder(1);
+        builder.appendTable(0, 0, table);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Too large code point");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat12Cmap(
+                0, 0, std::vector<uint32_t>({0x110000, 0x110000}));
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed range");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat12Cmap(
+                0, 0, std::vector<uint32_t>({'b', 'b', 'a', 'a'}));
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed range - partially readable");
+        std::vector<uint8_t> cmap = CmapBuilder::buildSingleFormat12Cmap(
+                0, 0, std::vector<uint32_t>({'a', 'a', 'c', 'c', 'b', 'b'}));
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_EQ(0U, coverage.length());
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, TableSelection_Priority) {
+    std::vector<uint8_t> highestFormat12Table =
+            buildCmapFormat12Table(std::vector<uint32_t>({'a', 'a'}));
+    std::vector<uint8_t> highestFormat4Table =
+            buildCmapFormat4Table(std::vector<uint16_t>({'a', 'a'}));
+    std::vector<uint8_t> format4 = buildCmapFormat4Table(std::vector<uint16_t>({'b', 'b'}));
+    std::vector<uint8_t> format12 = buildCmapFormat12Table(std::vector<uint32_t>({'b', 'b'}));
+
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    {
+        SCOPED_TRACE("(platform, encoding) = (3, 10) is the highest priority.");
+
+        struct LowerPriorityTable {
+            uint16_t platformId;
+            uint16_t encodingId;
+            const std::vector<uint8_t>& table;
+        } LOWER_PRIORITY_TABLES[] = {
+            { 0, 0, format4 },
+            { 0, 1, format4 },
+            { 0, 2, format4 },
+            { 0, 3, format4 },
+            { 0, 4, format12 },
+            { 0, 6, format12 },
+            { 3, 1, format4 },
+        };
+
+        for (const auto& table : LOWER_PRIORITY_TABLES) {
+            CmapBuilder builder(2);
+            builder.appendTable(table.platformId, table.encodingId, table.table);
+            builder.appendTable(3, 10, highestFormat12Table);
+            std::vector<uint8_t> cmap = builder.build();
+
+            SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+            EXPECT_TRUE(coverage.get('a'));  // comes from highest table
+            EXPECT_FALSE(coverage.get('b'));  // should not use other table.
+            EXPECT_TRUE(vsTables.empty());
+        }
+    }
+    {
+        SCOPED_TRACE("(platform, encoding) = (3, 1) case");
+
+        struct LowerPriorityTable {
+            uint16_t platformId;
+            uint16_t encodingId;
+            const std::vector<uint8_t>& table;
+        } LOWER_PRIORITY_TABLES[] = {
+            { 0, 0, format4 },
+            { 0, 1, format4 },
+            { 0, 2, format4 },
+            { 0, 3, format4 },
+        };
+
+        for (const auto& table : LOWER_PRIORITY_TABLES) {
+            CmapBuilder builder(2);
+            builder.appendTable(table.platformId, table.encodingId, table.table);
+            builder.appendTable(3, 1, highestFormat4Table);
+            std::vector<uint8_t> cmap = builder.build();
+
+            SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+            EXPECT_TRUE(coverage.get('a'));  // comes from highest table
+            EXPECT_FALSE(coverage.get('b'));  // should not use other table.
+            EXPECT_TRUE(vsTables.empty());
+        }
+    }
+}
+
+TEST(CmapCoverageTest, TableSelection_SkipBrokenFormat4Table) {
+    std::vector<uint8_t> validTable = buildCmapFormat4Table(std::vector<uint16_t>({'a', 'a'}));
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    {
+        SCOPED_TRACE("Unsupported format");
+        CmapBuilder builder(2);
+        std::vector<uint8_t> table = buildCmapFormat4Table(std::vector<uint16_t>({'b', 'b'}));
+        writeU16(0, table.data(), 0 /* format offset */);
+        builder.appendTable(3, 1, table);
+        builder.appendTable(0, 0, validTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));  // comes from valid table
+        EXPECT_FALSE(coverage.get('b'));  // should not use invalid table.
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Invalid language");
+        CmapBuilder builder(2);
+        std::vector<uint8_t> table = buildCmapFormat4Table(std::vector<uint16_t>({'b', 'b'}));
+        writeU16(1, table.data(), 4 /* language offset */);
+        builder.appendTable(3, 1, table);
+        builder.appendTable(0, 0, validTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));  // comes from valid table
+        EXPECT_FALSE(coverage.get('b'));  // should not use invalid table.
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Invalid length");
+        CmapBuilder builder(2);
+        std::vector<uint8_t> table = buildCmapFormat4Table(std::vector<uint16_t>({'b', 'b'}));
+        writeU16(5000, table.data(), 2 /* length offset */);
+        builder.appendTable(3, 1, table);
+        builder.appendTable(0, 0, validTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));  // comes from valid table
+        EXPECT_FALSE(coverage.get('b'));  // should not use invalid table.
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, TableSelection_SkipBrokenFormat12Table) {
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    std::vector<uint8_t> validTable =
+            buildCmapFormat12Table(std::vector<uint32_t>({'a', 'a'}));
+    {
+        SCOPED_TRACE("Unsupported format");
+        CmapBuilder builder(2);
+        std::vector<uint8_t> table = buildCmapFormat12Table(std::vector<uint32_t>({'b', 'b'}));
+        writeU16(0, table.data(), 0 /* format offset */);
+        builder.appendTable(3, 1, table);
+        builder.appendTable(0, 0, validTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));  // comes from valid table
+        EXPECT_FALSE(coverage.get('b'));  // should not use invalid table.
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Invalid language");
+        CmapBuilder builder(2);
+        std::vector<uint8_t> table = buildCmapFormat12Table(std::vector<uint32_t>({'b', 'b'}));
+        writeU32(1, table.data(), 8 /* language offset */);
+        builder.appendTable(3, 1, table);
+        builder.appendTable(0, 0, validTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));  // comes from valid table
+        EXPECT_FALSE(coverage.get('b'));  // should not use invalid table.
+        EXPECT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Invalid length");
+        CmapBuilder builder(2);
+        std::vector<uint8_t> table = buildCmapFormat12Table(std::vector<uint32_t>({'b', 'b'}));
+        writeU32(5000, table.data(), 4 /* length offset */);
+        builder.appendTable(3, 1, table);
+        builder.appendTable(0, 0, validTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        EXPECT_TRUE(coverage.get('a'));  // comes from valid table
+        EXPECT_FALSE(coverage.get('b'));  // should not use invalid table.
+        EXPECT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, TableSelection_VSTable) {
+    std::vector<uint8_t> smallLetterTable =
+            buildCmapFormat12Table(std::vector<uint32_t>({'a', 'z'}));
+    std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+        { 0xFE0E, { 'a', 'b' }, {} /* no non-default UVS table */ },
+        { 0xFE0F, {} /* no default UVS table */, { 'a', 'b'} },
+        { 0xE0100, { 'a', 'a' }, { 'b'} },
+    }));
+    CmapBuilder builder(2);
+    builder.appendTable(3, 1, smallLetterTable);
+    builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+    std::vector<uint8_t> cmap = builder.build();
+
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+    EXPECT_TRUE(coverage.get('a'));
+    ASSERT_FALSE(vsTables.empty());
+
+    const uint16_t vs15Index = getVsIndex(0xFE0E);
+    ASSERT_LT(vs15Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs15Index]);
+    EXPECT_TRUE(vsTables[vs15Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs15Index]->get('b'));
+
+    const uint16_t vs16Index = getVsIndex(0xFE0F);
+    ASSERT_LT(vs16Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs16Index]);
+    EXPECT_TRUE(vsTables[vs16Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs16Index]->get('b'));
+
+    const uint16_t vs17Index = getVsIndex(0xE0100);
+    ASSERT_LT(vs17Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs17Index]);
+    EXPECT_TRUE(vsTables[vs17Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs17Index]->get('b'));
+}
+
+TEST(CmapCoverageTest, TableSelection_InterSection) {
+    std::vector<uint8_t> smallLetterTable =
+            buildCmapFormat12Table(std::vector<uint32_t>({'a', 'z'}));
+    std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+        { 0xFE0E, { 'a', 'e' }, { 'c', 'd', } },
+        { 0xFE0F, { 'c', 'e'} , { 'a', 'b', 'c', 'd', 'e'} },
+        { 0xE0100, { 'a', 'c' }, { 'b', 'c', 'd' } },
+        { 0xE0101, { 'b', 'd'} , { 'a', 'b', 'c', 'd'} },
+        { 0xE0102, { 'a', 'c', 'd', 'g'} , { 'b', 'c', 'd', 'e', 'f', 'g', 'h'} },
+        { 0xE0103, { 'a', 'f'} , { 'b', 'd', } },
+    }));
+    CmapBuilder builder(2);
+    builder.appendTable(3, 1, smallLetterTable);
+    builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+    std::vector<uint8_t> cmap = builder.build();
+
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+    EXPECT_TRUE(coverage.get('a'));
+    ASSERT_FALSE(vsTables.empty());
+
+    const uint16_t vs15Index = getVsIndex(0xFE0E);
+    ASSERT_LT(vs15Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs15Index]);
+    EXPECT_TRUE(vsTables[vs15Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs15Index]->get('b'));
+    EXPECT_TRUE(vsTables[vs15Index]->get('c'));
+    EXPECT_TRUE(vsTables[vs15Index]->get('d'));
+    EXPECT_TRUE(vsTables[vs15Index]->get('e'));
+
+    const uint16_t vs16Index = getVsIndex(0xFE0F);
+    ASSERT_LT(vs16Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs16Index]);
+    EXPECT_TRUE(vsTables[vs16Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs16Index]->get('b'));
+    EXPECT_TRUE(vsTables[vs16Index]->get('c'));
+    EXPECT_TRUE(vsTables[vs16Index]->get('d'));
+    EXPECT_TRUE(vsTables[vs16Index]->get('e'));
+
+    const uint16_t vs17Index = getVsIndex(0xE0100);
+    ASSERT_LT(vs17Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs17Index]);
+    EXPECT_TRUE(vsTables[vs17Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs17Index]->get('b'));
+    EXPECT_TRUE(vsTables[vs17Index]->get('c'));
+    EXPECT_TRUE(vsTables[vs17Index]->get('d'));
+
+    const uint16_t vs18Index = getVsIndex(0xE0101);
+    ASSERT_LT(vs18Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs18Index]);
+    EXPECT_TRUE(vsTables[vs18Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs18Index]->get('b'));
+    EXPECT_TRUE(vsTables[vs18Index]->get('c'));
+    EXPECT_TRUE(vsTables[vs18Index]->get('d'));
+
+    const uint16_t vs19Index = getVsIndex(0xE0102);
+    ASSERT_LT(vs19Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs19Index]);
+    EXPECT_TRUE(vsTables[vs19Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('b'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('c'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('d'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('e'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('f'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('g'));
+    EXPECT_TRUE(vsTables[vs19Index]->get('h'));
+
+    const uint16_t vs20Index = getVsIndex(0xE0103);
+    ASSERT_LT(vs20Index, vsTables.size());
+    ASSERT_TRUE(vsTables[vs20Index]);
+    EXPECT_TRUE(vsTables[vs20Index]->get('a'));
+    EXPECT_TRUE(vsTables[vs20Index]->get('b'));
+    EXPECT_TRUE(vsTables[vs20Index]->get('c'));
+    EXPECT_TRUE(vsTables[vs20Index]->get('d'));
+    EXPECT_TRUE(vsTables[vs20Index]->get('e'));
+    EXPECT_TRUE(vsTables[vs20Index]->get('f'));
+}
+
+TEST(CmapCoverageTest, TableSelection_brokenVSTable) {
+    std::vector<uint8_t> cmap12Table = buildCmapFormat12Table(std::vector<uint32_t>({'a', 'z'}));
+    {
+        SCOPED_TRACE("Too small cmap size");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0E, { 'a', 'a' }, { 'b' } }
+        }));
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(
+                cmap.data(), 3 /* too small size */, &vsTables);
+        EXPECT_FALSE(coverage.get('a'));
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Too many variation records");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a' }, { 'b' } }
+        }));
+        writeU32(5000, vsTable.data(), 6 /* numVarSelectorRecord offset */);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Invalid default UVS offset in variation records");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a' }, { 'b' } }
+        }));
+        writeU32(5000, vsTable.data(), 13 /* defaultUVSffset offset in the first record */);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Invalid non default UVS offset in variation records");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a' }, { 'b' } }
+        }));
+        writeU32(5000, vsTable.data(), 17 /* nonDefaultUVSffset offset in the first record */);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Too many ranges entry in default UVS table");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a' }, { 'b' } }
+        }));
+        // 21 is the offset of the numUnicodeValueRanges in the fist defulat UVS table.
+        writeU32(5000, vsTable.data(), 21);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Too many ranges entry in non default UVS table");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a' }, { 'b' } }
+        }));
+        // 29 is the offset of the numUnicodeValueRanges in the fist defulat UVS table.
+        writeU32(5000, vsTable.data(), 29);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed range in default UVS table");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'b', 'b', 'a', 'a' }, { } }
+        }));
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed range in default UVS table - partially readable");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a', 'c', 'c', 'b', 'b' }, { } }
+        }));
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed mapping entries in non default UVS table");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { }, { 'b', 'a' } }
+        }));
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+    {
+        SCOPED_TRACE("Reversed mapping entries in non default UVS table");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { }, { 'a', 'c', 'b' } }
+        }));
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+        ASSERT_TRUE(vsTables.empty());
+    }
+}
+
+TEST(CmapCoverageTest, TableSelection_brokenVSTable_bestEffort) {
+    std::vector<uint8_t> cmap12Table = buildCmapFormat12Table(std::vector<uint32_t>({'a', 'a'}));
+    {
+        SCOPED_TRACE("Invalid default UVS offset in variation records");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0E, { 'a', 'a' }, { 'b' } },
+            { 0xFE0F, { 'a', 'a' }, { 'b' } },
+        }));
+        writeU32(5000, vsTable.data(), 13 /* defaultUVSffset offset in the record for 0xFE0E */);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+
+        const uint16_t vs16Index = getVsIndex(0xFE0F);
+        ASSERT_LT(vs16Index, vsTables.size());
+        ASSERT_TRUE(vsTables[vs16Index]);
+        EXPECT_TRUE(vsTables[vs16Index]->get('a'));
+        EXPECT_TRUE(vsTables[vs16Index]->get('b'));
+
+        const uint16_t vs15Index = getVsIndex(0xFE0E);
+        EXPECT_FALSE(vsTables[vs15Index]);
+    }
+    {
+        SCOPED_TRACE("Invalid non default UVS offset in variation records");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0E, { 'a', 'a' }, { 'b' } },
+            { 0xFE0F, { 'a', 'a' }, { 'b' } },
+        }));
+        writeU32(5000, vsTable.data(), 17 /* nonDefaultUVSffset offset in the first record */);
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+
+        const uint16_t vs16Index = getVsIndex(0xFE0F);
+        ASSERT_LT(vs16Index, vsTables.size());
+        ASSERT_TRUE(vsTables[vs16Index]);
+        EXPECT_TRUE(vsTables[vs16Index]->get('a'));
+        EXPECT_TRUE(vsTables[vs16Index]->get('b'));
+
+        const uint16_t vs15Index = getVsIndex(0xFE0E);
+        EXPECT_FALSE(vsTables[vs15Index]);
+    }
+    {
+        SCOPED_TRACE("Unknown variation selectors.");
+        std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+            { 0xFE0F, { 'a', 'a' }, { 'b' } },
+            { 0xEFFFF, { 'a', 'a' }, { 'b' } },
+        }));
+        CmapBuilder builder(2);
+        builder.appendTable(3, 1, cmap12Table);
+        builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+        std::vector<uint8_t> cmap = builder.build();
+
+        std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+        SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+
+        const uint16_t vs16Index = getVsIndex(0xFE0F);
+        ASSERT_LT(vs16Index, vsTables.size());
+        ASSERT_TRUE(vsTables[vs16Index]);
+        EXPECT_TRUE(vsTables[vs16Index]->get('a'));
+        EXPECT_TRUE(vsTables[vs16Index]->get('b'));
+    }
+}
+
+// Used only for better looking of range definition.
+#define RANGE(x, y) x, y
+
+TEST(CmapCoverageTest, TableSelection_defaultUVSPointMissingGlyph) {
+    std::vector<uint8_t> baseTable = buildCmapFormat12Table(std::vector<uint32_t>(
+            {RANGE('a', 'e'), RANGE('g', 'h'), RANGE('j', 'j'), RANGE('m', 'z')}));
+    std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+        { 0xFE0F, { 'a', 'z' }, { } }
+    }));
+
+    CmapBuilder builder(2);
+    builder.appendTable(3, 1, baseTable);
+    builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+    std::vector<uint8_t> cmap = builder.build();
+
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+    const uint16_t vsIndex = getVsIndex(0xFE0F);
+    ASSERT_LT(vsIndex, vsTables.size());
+    ASSERT_TRUE(vsTables[vsIndex]);
+
+    for (char c = 'a'; c <= 'z'; ++c)  {
+        // Default UVS table points the variation sequence to the glyph of the base code point.
+        // Thus, if the base code point is not supported, we should exclude them.
+        EXPECT_EQ(coverage.get(c), vsTables[vsIndex]->get(c)) << c;
+    }
+}
+
+#undef RANGE
+
+TEST(CmapCoverageTest, TableSelection_vsTableOnly) {
+    std::vector<uint8_t> vsTable = buildCmapFormat14Table(std::vector<VariationSelectorRecord>({
+        { 0xFE0F, { }, { 'a' } }
+    }));
+
+    CmapBuilder builder(1);
+    builder.appendTable(VS_PLATFORM_ID, VS_ENCODING_ID, vsTable);
+    std::vector<uint8_t> cmap = builder.build();
+
+    std::vector<std::unique_ptr<SparseBitSet>> vsTables;
+    SparseBitSet coverage = CmapCoverage::getCoverage(cmap.data(), cmap.size(), &vsTables);
+    const uint16_t vsIndex = getVsIndex(0xFE0F);
+    ASSERT_LT(vsIndex, vsTables.size());
+    ASSERT_TRUE(vsTables[vsIndex]);
+    EXPECT_TRUE(vsTables[vsIndex]->get('a'));
+}
+}  // namespace minikin

diff --git a/tests/unittest/EmojiTest.cpp b/tests/unittest/EmojiTest.cpp
new file mode 100644
index 0000000..e7d0f56
--- /dev/null
+++ b/tests/unittest/EmojiTest.cpp

@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <unicode/uchar.h>
+
+#include <minikin/Emoji.h>
+
+namespace minikin {
+
+TEST(EmojiTest, isEmojiTest) {
+    EXPECT_TRUE(isEmoji(0x0023));  // NUMBER SIGN
+    EXPECT_TRUE(isEmoji(0x0035));  // DIGIT FIVE
+    EXPECT_TRUE(isEmoji(0x2640));  // FEMALE SIGN
+    EXPECT_TRUE(isEmoji(0x2642));  // MALE SIGN
+    EXPECT_TRUE(isEmoji(0x2695));  // STAFF OF AESCULAPIUS
+    EXPECT_TRUE(isEmoji(0x1F0CF));  // PLAYING CARD BLACK JOKER
+    EXPECT_TRUE(isEmoji(0x1F1E9));  // REGIONAL INDICATOR SYMBOL LETTER D
+    EXPECT_TRUE(isEmoji(0x1F6F7));  // SLED
+    EXPECT_TRUE(isEmoji(0x1F9E6));  // SOCKS
+
+    EXPECT_FALSE(isEmoji(0x0000));  // <control>
+    EXPECT_FALSE(isEmoji(0x0061));  // LATIN SMALL LETTER A
+    EXPECT_FALSE(isEmoji(0x1F93B));  // MODERN PENTATHLON
+    EXPECT_FALSE(isEmoji(0x1F946));  // RIFLE
+    EXPECT_FALSE(isEmoji(0x29E3D));  // A han character.
+}
+
+TEST(EmojiTest, isEmojiModifierTest) {
+    EXPECT_TRUE(isEmojiModifier(0x1F3FB));  // EMOJI MODIFIER FITZPATRICK TYPE-1-2
+    EXPECT_TRUE(isEmojiModifier(0x1F3FC));  // EMOJI MODIFIER FITZPATRICK TYPE-3
+    EXPECT_TRUE(isEmojiModifier(0x1F3FD));  // EMOJI MODIFIER FITZPATRICK TYPE-4
+    EXPECT_TRUE(isEmojiModifier(0x1F3FE));  // EMOJI MODIFIER FITZPATRICK TYPE-5
+    EXPECT_TRUE(isEmojiModifier(0x1F3FF));  // EMOJI MODIFIER FITZPATRICK TYPE-6
+
+    EXPECT_FALSE(isEmojiModifier(0x0000));  // <control>
+    EXPECT_FALSE(isEmojiModifier(0x1F3FA));  // AMPHORA
+    EXPECT_FALSE(isEmojiModifier(0x1F400));  // RAT
+    EXPECT_FALSE(isEmojiModifier(0x29E3D));  // A han character.
+}
+
+TEST(EmojiTest, isEmojiBaseTest) {
+    EXPECT_TRUE(isEmojiBase(0x261D));  // WHITE UP POINTING INDEX
+    EXPECT_TRUE(isEmojiBase(0x270D));  // WRITING HAND
+    EXPECT_TRUE(isEmojiBase(0x1F385));  // FATHER CHRISTMAS
+    EXPECT_TRUE(isEmojiBase(0x1F3C2));  // SNOWBOARDER
+    EXPECT_TRUE(isEmojiBase(0x1F3C7));  // HORSE RACING
+    EXPECT_TRUE(isEmojiBase(0x1F3CC));  // GOLFER
+    EXPECT_TRUE(isEmojiBase(0x1F574));  // MAN IN BUSINESS SUIT LEVITATING
+    EXPECT_TRUE(isEmojiBase(0x1F6CC));  // SLEEPING ACCOMMODATION
+    EXPECT_TRUE(isEmojiBase(0x1F91D));  // HANDSHAKE (removed from Emoji 4.0, but we need it)
+    EXPECT_TRUE(isEmojiBase(0x1F91F));  // I LOVE YOU HAND SIGN
+    EXPECT_TRUE(isEmojiBase(0x1F931));  // BREAST-FEEDING
+    EXPECT_TRUE(isEmojiBase(0x1F932));  // PALMS UP TOGETHER
+    EXPECT_TRUE(isEmojiBase(0x1F93C));  // WRESTLERS (removed from Emoji 4.0, but we need it)
+    EXPECT_TRUE(isEmojiBase(0x1F9D1));  // ADULT
+    EXPECT_TRUE(isEmojiBase(0x1F9DD));  // ELF
+
+    EXPECT_FALSE(isEmojiBase(0x0000));  // <control>
+    EXPECT_FALSE(isEmojiBase(0x261C));  // WHITE LEFT POINTING INDEX
+    EXPECT_FALSE(isEmojiBase(0x1F384));  // CHRISTMAS TREE
+    EXPECT_FALSE(isEmojiBase(0x1F9DE));  // GENIE
+    EXPECT_FALSE(isEmojiBase(0x29E3D));  // A han character.
+}
+
+TEST(EmojiTest, emojiBidiOverrideTest) {
+    EXPECT_EQ(U_RIGHT_TO_LEFT, emojiBidiOverride(nullptr, 0x05D0));  // HEBREW LETTER ALEF
+    EXPECT_EQ(U_LEFT_TO_RIGHT,
+            emojiBidiOverride(nullptr, 0x1F170));  // NEGATIVE SQUARED LATIN CAPITAL LETTER A
+    EXPECT_EQ(U_OTHER_NEUTRAL, emojiBidiOverride(nullptr, 0x1F6F7));  // SLED
+    EXPECT_EQ(U_OTHER_NEUTRAL, emojiBidiOverride(nullptr, 0x1F9E6));  // SOCKS
+}
+
+}  // namespace minikin

diff --git a/tests/FontCollectionItemizeTest.cpp b/tests/unittest/FontCollectionItemizeTest.cpp
similarity index 79%
rename from tests/FontCollectionItemizeTest.cpp
rename to tests/unittest/FontCollectionItemizeTest.cpp
index 468b4a2..78bfa3b 100644
--- a/tests/FontCollectionItemizeTest.cpp
+++ b/tests/unittest/FontCollectionItemizeTest.cpp

@@ -16,6 +16,8 @@
 
 #include <gtest/gtest.h>
 
+#include <memory>
+
 #include "FontLanguageListCache.h"
 #include "FontLanguage.h"
 #include "FontTestUtils.h"
@@ -25,16 +27,7 @@
 #include "UnicodeUtils.h"
 #include "minikin/FontFamily.h"
 
-using android::AutoMutex;
-using android::FontCollection;
-using android::FontFamily;
-using android::FontLanguage;
-using android::FontLanguages;
-using android::FontLanguageListCache;
-using android::FontStyle;
-using android::MinikinAutoUnref;
-using android::MinikinFont;
-using android::gMinikinLock;
+namespace minikin {
 
 const char kItemizeFontXml[] = kTestFontDir "itemize.xml";
 const char kEmojiFont[] = kTestFontDir "Emoji.ttf";
@@ -53,10 +46,13 @@
 const char kTextEmojiFont[] = kTestFontDir "TextEmojiFont.ttf";
 const char kMixedEmojiFont[] = kTestFontDir "ColorTextMixedEmojiFont.ttf";
 
+const char kHasCmapFormat14Font[] =  kTestFontDir "NoCmapFormat14.ttf";
+const char kNoCmapFormat14Font[] =  kTestFontDir "VariationSelectorTest-Regular.ttf";
+
 typedef ICUTestBase FontCollectionItemizeTest;
 
 // Utility function for calling itemize function.
-void itemize(FontCollection* collection, const char* str, FontStyle style,
+void itemize(const std::shared_ptr<FontCollection>& collection, const char* str, FontStyle style,
         std::vector<FontCollection::Run>* result) {
     const size_t BUF_SIZE = 256;
     uint16_t buf[BUF_SIZE];
@@ -64,7 +60,7 @@
 
     result->clear();
     ParseUnicode(buf, BUF_SIZE, str, &len, NULL);
-    AutoMutex _l(gMinikinLock);
+    android::AutoMutex _l(gMinikinLock);
     collection->itemize(buf, len, style, result);
 }
 
@@ -76,12 +72,12 @@
 
 // Utility function to obtain FontLanguages from string.
 const FontLanguages& registerAndGetFontLanguages(const std::string& lang_string) {
-    AutoMutex _l(gMinikinLock);
+    android::AutoMutex _l(gMinikinLock);
     return FontLanguageListCache::getById(FontLanguageListCache::getId(lang_string));
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_latin) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     const FontStyle kRegularStyle = FontStyle();
@@ -89,7 +85,7 @@
     const FontStyle kBoldStyle = FontStyle(7, false);
     const FontStyle kBoldItalicStyle = FontStyle(7, true);
 
-    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kRegularStyle, &runs);
+    itemize(collection, "'a' 'b' 'c' 'd' 'e'", kRegularStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -97,7 +93,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
-    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kItalicStyle, &runs);
+    itemize(collection, "'a' 'b' 'c' 'd' 'e'", kItalicStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -105,7 +101,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
-    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kBoldStyle, &runs);
+    itemize(collection, "'a' 'b' 'c' 'd' 'e'", kBoldStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -113,7 +109,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
-    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kBoldItalicStyle, &runs);
+    itemize(collection, "'a' 'b' 'c' 'd' 'e'", kBoldItalicStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -123,7 +119,7 @@
 
     // Continue if the specific characters (e.g. hyphen, comma, etc.) is
     // followed.
-    itemize(collection.get(), "'a' ',' '-' 'd' '!'", kRegularStyle, &runs);
+    itemize(collection, "'a' ',' '-' 'd' '!'", kRegularStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -131,7 +127,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
-    itemize(collection.get(), "'a' ',' '-' 'd' '!'", kRegularStyle, &runs);
+    itemize(collection, "'a' ',' '-' 'd' '!'", kRegularStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -141,7 +137,7 @@
 
     // U+0301(COMBINING ACUTE ACCENT) must be in the same run with preceding
     // chars if the font supports it.
-    itemize(collection.get(), "'a' U+0301", kRegularStyle, &runs);
+    itemize(collection, "'a' U+0301", kRegularStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -151,10 +147,10 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_emoji) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
-    itemize(collection.get(), "U+1F469 U+1F467", FontStyle(), &runs);
+    itemize(collection, "U+1F469 U+1F467", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
@@ -164,7 +160,7 @@
 
     // U+20E3(COMBINING ENCLOSING KEYCAP) must be in the same run with preceding
     // character if the font supports.
-    itemize(collection.get(), "'0' U+20E3", FontStyle(), &runs);
+    itemize(collection, "'0' U+20E3", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -172,7 +168,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
-    itemize(collection.get(), "U+1F470 U+20E3", FontStyle(), &runs);
+    itemize(collection, "U+1F470 U+20E3", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
@@ -180,7 +176,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
-    itemize(collection.get(), "U+242EE U+1F470 U+20E3", FontStyle(), &runs);
+    itemize(collection, "U+242EE U+1F470 U+20E3", FontStyle(), &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -196,7 +192,7 @@
 
     // Currently there is no fonts which has a glyph for 'a' + U+20E3, so they
     // are splitted into two.
-    itemize(collection.get(), "'a' U+20E3", FontStyle(), &runs);
+    itemize(collection, "'a' U+20E3", FontStyle(), &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -212,7 +208,7 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_non_latin) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     FontStyle kJAStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"));
@@ -220,7 +216,7 @@
     FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans"));
 
     // All Japanese Hiragana characters.
-    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kUSStyle, &runs);
+    itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kUSStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -229,7 +225,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
     // All Korean Hangul characters.
-    itemize(collection.get(), "U+B300 U+D55C U+BBFC U+AD6D", kUSStyle, &runs);
+    itemize(collection, "U+B300 U+D55C U+BBFC U+AD6D", kUSStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
@@ -239,7 +235,7 @@
 
     // All Han characters ja, zh-Hans font having.
     // Japanese font should be selected if the specified language is Japanese.
-    itemize(collection.get(), "U+81ED U+82B1 U+5FCD", kJAStyle, &runs);
+    itemize(collection, "U+81ED U+82B1 U+5FCD", kJAStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
@@ -249,7 +245,7 @@
 
     // Simplified Chinese font should be selected if the specified language is Simplified
     // Chinese.
-    itemize(collection.get(), "U+81ED U+82B1 U+5FCD", kZH_HansStyle, &runs);
+    itemize(collection, "U+81ED U+82B1 U+5FCD", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
@@ -259,7 +255,7 @@
 
     // Fallbacks to other fonts if there is no glyph in the specified language's
     // font. There is no character U+4F60 in Japanese.
-    itemize(collection.get(), "U+81ED U+4F60 U+5FCD", kJAStyle, &runs);
+    itemize(collection, "U+81ED U+4F60 U+5FCD", kJAStyle, &runs);
     ASSERT_EQ(3U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -280,7 +276,7 @@
     EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic());
 
     // Tone mark.
-    itemize(collection.get(), "U+4444 U+302D", FontStyle(), &runs);
+    itemize(collection, "U+4444 U+302D", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -291,7 +287,7 @@
     // Both zh-Hant and ja fonts support U+242EE, but zh-Hans doesn't.
     // Here, ja and zh-Hant font should have the same score but ja should be selected since it is
     // listed before zh-Hant.
-    itemize(collection.get(), "U+242EE", kZH_HansStyle, &runs);
+    itemize(collection, "U+242EE", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -301,12 +297,12 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_mixed) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US"));
 
-    itemize(collection.get(), "'a' U+4F60 'b' U+4F60 'c'", kUSStyle, &runs);
+    itemize(collection, "'a' U+4F60 'b' U+4F60 'c'", kUSStyle, &runs);
     ASSERT_EQ(5U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -340,7 +336,7 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_variationSelector) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     // A glyph for U+4FAE is provided by both Japanese font and Simplified
@@ -352,19 +348,19 @@
 
     // U+4FAE is available in both zh_Hans and ja font, but U+4FAE,U+FE00 is
     // only available in ja font.
-    itemize(collection.get(), "U+4FAE", kZH_HansStyle, &runs);
+    itemize(collection, "U+4FAE", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
     EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+4FAE U+FE00", kZH_HansStyle, &runs);
+    itemize(collection, "U+4FAE U+FE00", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+4FAE U+4FAE U+FE00", kZH_HansStyle, &runs);
+    itemize(collection, "U+4FAE U+4FAE U+FE00", kZH_HansStyle, &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -373,7 +369,7 @@
     EXPECT_EQ(3, runs[1].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[1]));
 
-    itemize(collection.get(), "U+4FAE U+4FAE U+FE00 U+4FAE", kZH_HansStyle, &runs);
+    itemize(collection, "U+4FAE U+4FAE U+FE00 U+4FAE", kZH_HansStyle, &runs);
     ASSERT_EQ(3U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -386,14 +382,14 @@
     EXPECT_EQ(kZH_HansFont, getFontPath(runs[2]));
 
     // Validation selector after validation selector.
-    itemize(collection.get(), "U+4FAE U+FE00 U+FE00", kZH_HansStyle, &runs);
+    itemize(collection, "U+4FAE U+FE00 U+FE00", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[1]));
 
     // No font supports U+242EE U+FE0E.
-    itemize(collection.get(), "U+4FAE U+FE0E", kZH_HansStyle, &runs);
+    itemize(collection, "U+4FAE U+FE0E", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -402,19 +398,19 @@
     // Surrogate pairs handling.
     // U+242EE is available in ja font and zh_Hant font.
     // U+242EE U+FE00 is available only in ja font.
-    itemize(collection.get(), "U+242EE", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+242EE U+FE00", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+FE00", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+242EE U+242EE U+FE00", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+242EE U+FE00", kZH_HantStyle, &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -423,7 +419,7 @@
     EXPECT_EQ(5, runs[1].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[1]));
 
-    itemize(collection.get(), "U+242EE U+242EE U+FE00 U+242EE", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+242EE U+FE00 U+242EE", kZH_HantStyle, &runs);
     ASSERT_EQ(3U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -436,27 +432,27 @@
     EXPECT_EQ(kZH_HantFont, getFontPath(runs[2]));
 
     // Validation selector after validation selector.
-    itemize(collection.get(), "U+242EE U+FE00 U+FE00", kZH_HansStyle, &runs);
+    itemize(collection, "U+242EE U+FE00 U+FE00", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
     // No font supports U+242EE U+FE0E
-    itemize(collection.get(), "U+242EE U+FE0E", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+FE0E", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
     EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
 
     // Isolated variation selector supplement.
-    itemize(collection.get(), "U+FE00", FontStyle(), &runs);
+    itemize(collection, "U+FE00", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
     EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+FE00", kZH_HantStyle, &runs);
+    itemize(collection, "U+FE00", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -464,14 +460,14 @@
 
     // First font family (Regular.ttf) supports U+203C but doesn't support U+203C U+FE0F.
     // Emoji.ttf font supports U+203C U+FE0F.  Emoji.ttf should be selected.
-    itemize(collection.get(), "U+203C U+FE0F", kZH_HantStyle, &runs);
+    itemize(collection, "U+203C U+FE0F", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_EQ(kEmojiFont, getFontPath(runs[0]));
 
     // First font family (Regular.ttf) supports U+203C U+FE0E.
-    itemize(collection.get(), "U+203C U+FE0E", kZH_HantStyle, &runs);
+    itemize(collection, "U+203C U+FE0E", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -479,7 +475,7 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_variationSelectorSupplement) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     // A glyph for U+845B is provided by both Japanese font and Simplified
@@ -491,19 +487,19 @@
 
     // U+845B is available in both zh_Hans and ja font, but U+845B,U+E0100 is
     // only available in ja font.
-    itemize(collection.get(), "U+845B", kZH_HansStyle, &runs);
+    itemize(collection, "U+845B", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
     EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+845B U+E0100", kZH_HansStyle, &runs);
+    itemize(collection, "U+845B U+E0100", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+845B U+845B U+E0100", kZH_HansStyle, &runs);
+    itemize(collection, "U+845B U+845B U+E0100", kZH_HansStyle, &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -512,7 +508,7 @@
     EXPECT_EQ(4, runs[1].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[1]));
 
-    itemize(collection.get(), "U+845B U+845B U+E0100 U+845B", kZH_HansStyle, &runs);
+    itemize(collection, "U+845B U+845B U+E0100 U+845B", kZH_HansStyle, &runs);
     ASSERT_EQ(3U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
@@ -525,14 +521,14 @@
     EXPECT_EQ(kZH_HansFont, getFontPath(runs[2]));
 
     // Validation selector after validation selector.
-    itemize(collection.get(), "U+845B U+E0100 U+E0100", kZH_HansStyle, &runs);
+    itemize(collection, "U+845B U+E0100 U+E0100", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
     // No font supports U+845B U+E01E0.
-    itemize(collection.get(), "U+845B U+E01E0", kZH_HansStyle, &runs);
+    itemize(collection, "U+845B U+E01E0", kZH_HansStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
@@ -542,19 +538,19 @@
     // Surrogate pairs handling.
     // U+242EE is available in ja font and zh_Hant font.
     // U+242EE U+E0100 is available only in ja font.
-    itemize(collection.get(), "U+242EE", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+242EE U+E0101", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+E0101", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+242EE U+242EE U+E0101", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+242EE U+E0101", kZH_HantStyle, &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -563,7 +559,7 @@
     EXPECT_EQ(6, runs[1].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[1]));
 
-    itemize(collection.get(), "U+242EE U+242EE U+E0101 U+242EE", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+242EE U+E0101 U+242EE", kZH_HantStyle, &runs);
     ASSERT_EQ(3U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -576,27 +572,27 @@
     EXPECT_EQ(kZH_HantFont, getFontPath(runs[2]));
 
     // Validation selector after validation selector.
-    itemize(collection.get(), "U+242EE U+E0100 U+E0100", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+E0100 U+E0100", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(6, runs[0].end);
     EXPECT_EQ(kJAFont, getFontPath(runs[0]));
 
     // No font supports U+242EE U+E01E0.
-    itemize(collection.get(), "U+242EE U+E01E0", kZH_HantStyle, &runs);
+    itemize(collection, "U+242EE U+E01E0", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
     EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
 
     // Isolated variation selector supplement.
-    itemize(collection.get(), "U+E0100", FontStyle(), &runs);
+    itemize(collection, "U+E0100", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+E0100", kZH_HantStyle, &runs);
+    itemize(collection, "U+E0100", kZH_HantStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -604,31 +600,31 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_no_crash) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     // Broken Surrogate pairs. Check only not crashing.
-    itemize(collection.get(), "'a' U+D83D 'a'", FontStyle(), &runs);
-    itemize(collection.get(), "'a' U+DC69 'a'", FontStyle(), &runs);
-    itemize(collection.get(), "'a' U+D83D U+D83D 'a'", FontStyle(), &runs);
-    itemize(collection.get(), "'a' U+DC69 U+DC69 'a'", FontStyle(), &runs);
+    itemize(collection, "'a' U+D83D 'a'", FontStyle(), &runs);
+    itemize(collection, "'a' U+DC69 'a'", FontStyle(), &runs);
+    itemize(collection, "'a' U+D83D U+D83D 'a'", FontStyle(), &runs);
+    itemize(collection, "'a' U+DC69 U+DC69 'a'", FontStyle(), &runs);
 
     // Isolated variation selector. Check only not crashing.
-    itemize(collection.get(), "U+FE00 U+FE00", FontStyle(), &runs);
-    itemize(collection.get(), "U+E0100 U+E0100", FontStyle(), &runs);
-    itemize(collection.get(), "U+FE00 U+E0100", FontStyle(), &runs);
-    itemize(collection.get(), "U+E0100 U+FE00", FontStyle(), &runs);
+    itemize(collection, "U+FE00 U+FE00", FontStyle(), &runs);
+    itemize(collection, "U+E0100 U+E0100", FontStyle(), &runs);
+    itemize(collection, "U+FE00 U+E0100", FontStyle(), &runs);
+    itemize(collection, "U+E0100 U+FE00", FontStyle(), &runs);
 
     // Tone mark only. Check only not crashing.
-    itemize(collection.get(), "U+302D", FontStyle(), &runs);
-    itemize(collection.get(), "U+302D U+302D", FontStyle(), &runs);
+    itemize(collection, "U+302D", FontStyle(), &runs);
+    itemize(collection, "U+302D U+302D", FontStyle(), &runs);
 
     // Tone mark and variation selector mixed. Check only not crashing.
-    itemize(collection.get(), "U+FE00 U+302D U+E0100", FontStyle(), &runs);
+    itemize(collection, "U+FE00 U+302D U+E0100", FontStyle(), &runs);
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_fakery) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
     std::vector<FontCollection::Run> runs;
 
     FontStyle kJABoldStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, false);
@@ -640,7 +636,7 @@
     // the differences between desired and actual font style.
 
     // All Japanese Hiragana characters.
-    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldStyle, &runs);
+    itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -649,7 +645,7 @@
     EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
 
     // All Japanese Hiragana characters.
-    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kJAItalicStyle, &runs);
+    itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJAItalicStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -658,7 +654,7 @@
     EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic());
 
     // All Japanese Hiragana characters.
-    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldItalicStyle, &runs);
+    itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldItalicStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
@@ -671,29 +667,28 @@
     // kVSTestFont supports U+717D U+FE02 but doesn't support U+717D.
     // kVSTestFont should be selected for U+717D U+FE02 even if it does not support the base code
     // point.
-    const std::string kVSTestFont = kTestFontDir "VarioationSelectorTest-Regular.ttf";
+    const std::string kVSTestFont = kTestFontDir "VariationSelectorTest-Regular.ttf";
 
-    std::vector<android::FontFamily*> families;
-    FontFamily* family1 = new FontFamily(android::VARIANT_DEFAULT);
-    family1->addFont(new MinikinFontForTest(kLatinFont));
+    std::vector<std::shared_ptr<FontFamily>> families;
+    std::shared_ptr<MinikinFont> font(new MinikinFontForTest(kLatinFont));
+    std::shared_ptr<FontFamily> family1(new FontFamily(VARIANT_DEFAULT,
+            std::vector<Font>{ Font(font, FontStyle()) }));
     families.push_back(family1);
 
-    FontFamily* family2 = new FontFamily(android::VARIANT_DEFAULT);
-    family2->addFont(new MinikinFontForTest(kVSTestFont));
+    std::shared_ptr<MinikinFont> font2(new MinikinFontForTest(kVSTestFont));
+    std::shared_ptr<FontFamily> family2(new FontFamily(VARIANT_DEFAULT,
+            std::vector<Font>{ Font(font2, FontStyle()) }));
     families.push_back(family2);
 
-    FontCollection collection(families);
+    std::shared_ptr<FontCollection> collection(new FontCollection(families));
 
     std::vector<FontCollection::Run> runs;
 
-    itemize(&collection, "U+717D U+FE02", FontStyle(), &runs);
+    itemize(collection, "U+717D U+FE02", FontStyle(), &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_EQ(kVSTestFont, getFontPath(runs[0]));
-
-    family1->Unref();
-    family2->Unref();
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_LanguageScore) {
@@ -784,6 +779,20 @@
         // Language match with unified subscript bits.
         { "zh-Hanb", { "zh-Hant", "zh-Bopo", "ja-Hant,ja-Bopo", "zh-Hant,zh-Bopo"}, 3 },
         { "zh-Hanb", { "zh-Hant", "zh-Bopo", "ja-Hant,zh-Bopo", "zh-Hant,zh-Bopo"}, 3 },
+
+        // Two elements subtag matching: language and subtag or language or script.
+        { "ja-Kana-u-em-emoji", { "zh-Hant", "ja-Kana"}, 1 },
+        { "ja-Kana-u-em-emoji", { "zh-Hant", "ja-Kana", "ja-Zsye"}, 2 },
+        { "ja-Zsym-u-em-emoji", { "ja-Kana", "ja-Zsym", "ja-Zsye"}, 2 },
+
+        // One element subtag matching: subtag only or script only.
+        { "en-Latn-u-em-emoji", { "ja-Latn", "ja-Zsye"}, 1 },
+        { "en-Zsym-u-em-emoji", { "ja-Zsym", "ja-Zsye"}, 1 },
+        { "en-Zsye-u-em-text", { "ja-Zsym", "ja-Zsye"}, 0 },
+
+        // Multiple languages list with subtags.
+        { "en-Latn,ja-Jpan-u-em-text", { "en-Latn", "en-Zsye", "en-Zsym"}, 0 },
+        { "en-Latn,en-Zsye,ja-Jpan-u-em-text", { "zh", "en-Zsye", "en-Zsym"}, 1 },
     };
 
     for (auto testCase : testCases) {
@@ -798,13 +807,14 @@
         SCOPED_TRACE("Test of user preferred languages: \"" + testCase.userPreferredLanguages +
                      "\" with font languages: " + fontLanguagesStr);
 
-        std::vector<FontFamily*> families;
+        std::vector<std::shared_ptr<FontFamily>> families;
 
         // Prepare first font which doesn't supports U+9AA8
-        FontFamily* firstFamily = new FontFamily(
-                FontStyle::registerLanguageList("und"), 0 /* variant */);
-        MinikinFont* firstFamilyMinikinFont = new MinikinFontForTest(kNoGlyphFont);
-        firstFamily->addFont(firstFamilyMinikinFont);
+        std::shared_ptr<MinikinFont> firstFamilyMinikinFont(
+                new MinikinFontForTest(kNoGlyphFont));
+        std::shared_ptr<FontFamily> firstFamily(new FontFamily(
+                FontStyle::registerLanguageList("und"), 0 /* variant */,
+                std::vector<Font>({ Font(firstFamilyMinikinFont, FontStyle()) })));
         families.push_back(firstFamily);
 
         // Prepare font families
@@ -813,29 +823,25 @@
         std::unordered_map<MinikinFont*, int> fontLangIdxMap;
 
         for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) {
-            FontFamily* family = new FontFamily(
-                    FontStyle::registerLanguageList(testCase.fontLanguages[i]), 0 /* variant */);
-            MinikinFont* minikin_font = new MinikinFontForTest(kJAFont);
-            family->addFont(minikin_font);
+            std::shared_ptr<MinikinFont> minikin_font(new MinikinFontForTest(kJAFont));
+            std::shared_ptr<FontFamily> family(new FontFamily(
+                    FontStyle::registerLanguageList(testCase.fontLanguages[i]), 0 /* variant */,
+                    std::vector<Font>({ Font(minikin_font, FontStyle()) })));
             families.push_back(family);
-            fontLangIdxMap.insert(std::make_pair(minikin_font, i));
+            fontLangIdxMap.insert(std::make_pair(minikin_font.get(), i));
         }
-        FontCollection collection(families);
-        for (auto family : families) {
-            family->Unref();
-        }
-
+        std::shared_ptr<FontCollection> collection(new FontCollection(families));
         // Do itemize
         const FontStyle style = FontStyle(
                 FontStyle::registerLanguageList(testCase.userPreferredLanguages));
         std::vector<FontCollection::Run> runs;
-        itemize(&collection, "U+9AA8", style, &runs);
+        itemize(collection, "U+9AA8", style, &runs);
         ASSERT_EQ(1U, runs.size());
         ASSERT_NE(nullptr, runs[0].fakedFont.font);
 
         // First family doesn't support U+9AA8 and others support it, so the first font should not
         // be selected.
-        EXPECT_NE(firstFamilyMinikinFont, runs[0].fakedFont.font);
+        EXPECT_NE(firstFamilyMinikinFont.get(), runs[0].fakedFont.font);
 
         // Lookup used font family by MinikinFont*.
         const int usedLangIndex = fontLangIdxMap[runs[0].fakedFont.font];
@@ -1135,7 +1141,7 @@
         { "U+1F469", "zh-Hant,ja-Jpan,zh-Hans", kEmojiFont },
     };
 
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
 
     for (auto testCase : testCases) {
         SCOPED_TRACE("Test for \"" + testCase.testString + "\" with languages " +
@@ -1144,21 +1150,21 @@
         std::vector<FontCollection::Run> runs;
         const FontStyle style =
                 FontStyle(FontStyle::registerLanguageList(testCase.requestedLanguages));
-        itemize(collection.get(), testCase.testString.c_str(), style, &runs);
+        itemize(collection, testCase.testString.c_str(), style, &runs);
         ASSERT_EQ(1U, runs.size());
         EXPECT_EQ(testCase.expectedFont, getFontPath(runs[0]));
     }
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0E) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
     std::vector<FontCollection::Run> runs;
 
     const FontStyle kDefaultFontStyle;
 
     // U+00A9 is a text default emoji which is only available in TextEmojiFont.ttf.
     // TextEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+00A9 U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+00A9 U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1166,7 +1172,7 @@
 
     // U+00A9 is a text default emoji which is only available in ColorEmojiFont.ttf.
     // ColorEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+00AE U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+00AE U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1175,7 +1181,7 @@
 
     // U+203C is a text default emoji which is available in both TextEmojiFont.ttf and
     // ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+203C U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+203C U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1183,7 +1189,7 @@
 
     // U+2049 is a text default emoji which is not available either TextEmojiFont.ttf or
     // ColorEmojiFont.ttf. No font should be selected.
-    itemize(collection.get(), "U+2049 U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+2049 U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1191,7 +1197,7 @@
 
     // U+231A is a emoji default emoji which is available only in TextEmojifFont.
     // TextEmojiFont.ttf sohuld be selected.
-    itemize(collection.get(), "U+231A U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+231A U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1199,7 +1205,7 @@
 
     // U+231B is a emoji default emoji which is available only in ColorEmojiFont.ttf.
     // ColorEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+231B U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+231B U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1209,7 +1215,7 @@
     // U+23E9 is a emoji default emoji which is available in both TextEmojiFont.ttf and
     // ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected even if U+23E9 is emoji default
     // emoji since U+FE0E is appended.
-    itemize(collection.get(), "U+23E9 U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+23E9 U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1217,7 +1223,7 @@
 
     // U+23EA is a emoji default emoji but which is not available in either TextEmojiFont.ttf or
     // ColorEmojiFont.ttf. No font should be selected.
-    itemize(collection.get(), "U+23EA U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+23EA U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1225,7 +1231,7 @@
 
     // U+26FA U+FE0E is specified but ColorTextMixedEmojiFont has a variation sequence U+26F9 U+FE0F
     // in its cmap, so ColorTextMixedEmojiFont should be selected instaed of ColorEmojiFont.
-    itemize(collection.get(), "U+26FA U+FE0E", kDefaultFontStyle, &runs);
+    itemize(collection, "U+26FA U+FE0E", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1233,14 +1239,14 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0F) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
     std::vector<FontCollection::Run> runs;
 
     const FontStyle kDefaultFontStyle;
 
     // U+00A9 is a text default emoji which is available only in TextEmojiFont.ttf.
     // TextEmojiFont.ttf shoudl be selected.
-    itemize(collection.get(), "U+00A9 U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+00A9 U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1249,7 +1255,7 @@
 
     // U+00AE is a text default emoji which is available only in ColorEmojiFont.ttf.
     // ColorEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+00AE U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+00AE U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1258,7 +1264,7 @@
     // U+203C is a text default emoji which is available in both TextEmojiFont.ttf and
     // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected even if U+203C is a text default
     // emoji since U+FF0F is appended.
-    itemize(collection.get(), "U+203C U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+203C U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1266,7 +1272,7 @@
 
     // U+2049 is a text default emoji which is not available in either TextEmojiFont.ttf or
     // ColorEmojiFont.ttf. No font should be selected.
-    itemize(collection.get(), "U+2049 U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+2049 U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1274,7 +1280,7 @@
 
     // U+231A is a emoji default emoji which is available only in TextEmojiFont.ttf.
     // TextEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+231A U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+231A U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1283,7 +1289,7 @@
 
     // U+231B is a emoji default emoji which is available only in ColorEmojiFont.ttf.
     // ColorEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+231B U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+231B U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1291,7 +1297,7 @@
 
     // U+23E9 is a emoji default emoji which is available in both TextEmojiFont.ttf and
     // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected.
-    itemize(collection.get(), "U+23E9 U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+23E9 U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1299,7 +1305,7 @@
 
     // U+23EA is a emoji default emoji which is not available in either TextEmojiFont.ttf or
     // ColorEmojiFont.ttf. No font should be selected.
-    itemize(collection.get(), "U+23EA U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+23EA U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1307,7 +1313,7 @@
 
     // U+26F9 U+FE0F is specified but ColorTextMixedEmojiFont has a variation sequence U+26F9 U+FE0F
     // in its cmap, so ColorTextMixedEmojiFont should be selected instaed of ColorEmojiFont.
-    itemize(collection.get(), "U+26F9 U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+26F9 U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1315,27 +1321,27 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_with_skinTone) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
     std::vector<FontCollection::Run> runs;
 
     const FontStyle kDefaultFontStyle;
 
     // TextEmoji font is selected since it is listed before ColorEmoji font.
-    itemize(collection.get(), "U+261D", kDefaultFontStyle, &runs);
+    itemize(collection, "U+261D", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(1, runs[0].end);
     EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
 
     // If skin tone is specified, it should be colored.
-    itemize(collection.get(), "U+261D U+1F3FD", kDefaultFontStyle, &runs);
+    itemize(collection, "U+261D U+1F3FD", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(3, runs[0].end);
     EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
 
     // Still color font is selected if an emoji variation selector is specified.
-    itemize(collection.get(), "U+261D U+FE0F U+1F3FD", kDefaultFontStyle, &runs);
+    itemize(collection, "U+261D U+FE0F U+1F3FD", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
@@ -1343,7 +1349,7 @@
 
     // Text font should be selected if a text variation selector is specified and skin tone is
     // rendered by itself.
-    itemize(collection.get(), "U+261D U+FE0E U+1F3FD", kDefaultFontStyle, &runs);
+    itemize(collection, "U+261D U+FE0E U+1F3FD", kDefaultFontStyle, &runs);
     ASSERT_EQ(2U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
@@ -1354,19 +1360,19 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_PrivateUseArea) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
     std::vector<FontCollection::Run> runs;
 
     const FontStyle kDefaultFontStyle;
 
     // Should not set nullptr to the result run. (Issue 26808815)
-    itemize(collection.get(), "U+FEE10", kDefaultFontStyle, &runs);
+    itemize(collection, "U+FEE10", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(2, runs[0].end);
     EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+FEE40 U+FE4C5", kDefaultFontStyle, &runs);
+    itemize(collection, "U+FEE40 U+FE4C5", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
@@ -1374,26 +1380,96 @@
 }
 
 TEST_F(FontCollectionItemizeTest, itemize_genderBalancedEmoji) {
-    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
     std::vector<FontCollection::Run> runs;
 
     const FontStyle kDefaultFontStyle;
 
-    itemize(collection.get(), "U+1F469 U+200D U+1F373", kDefaultFontStyle, &runs);
+    itemize(collection, "U+1F469 U+200D U+1F373", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
     EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+1F469 U+200D U+2695 U+FE0F", kDefaultFontStyle, &runs);
+    itemize(collection, "U+1F469 U+200D U+2695 U+FE0F", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(5, runs[0].end);
     EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
 
-    itemize(collection.get(), "U+1F469 U+200D U+2695", kDefaultFontStyle, &runs);
+    itemize(collection, "U+1F469 U+200D U+2695", kDefaultFontStyle, &runs);
     ASSERT_EQ(1U, runs.size());
     EXPECT_EQ(0, runs[0].start);
     EXPECT_EQ(4, runs[0].end);
     EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
 }
+
+// For b/29585939
+TEST_F(FontCollectionItemizeTest, itemizeShouldKeepOrderForVS) {
+    const FontStyle kDefaultFontStyle;
+
+    std::shared_ptr<MinikinFont> dummyFont(new MinikinFontForTest(kNoGlyphFont));
+    std::shared_ptr<MinikinFont> fontA(new MinikinFontForTest(kZH_HansFont));
+    std::shared_ptr<MinikinFont> fontB(new MinikinFontForTest(kZH_HansFont));
+
+    std::shared_ptr<FontFamily> dummyFamily(new FontFamily(
+            std::vector<Font>({ Font(dummyFont, FontStyle()) })));
+    std::shared_ptr<FontFamily> familyA(new FontFamily(
+            std::vector<Font>({ Font(fontA, FontStyle()) })));
+    std::shared_ptr<FontFamily> familyB(new FontFamily(
+            std::vector<Font>({ Font(fontB, FontStyle()) })));
+
+    std::vector<std::shared_ptr<FontFamily>> families =
+            { dummyFamily, familyA, familyB };
+    std::vector<std::shared_ptr<FontFamily>> reversedFamilies =
+            { dummyFamily, familyB, familyA };
+
+    std::shared_ptr<FontCollection> collection(new FontCollection(families));
+    std::shared_ptr<FontCollection> reversedCollection(new FontCollection(reversedFamilies));
+
+    // Both fontA/fontB support U+35A8 but don't support U+35A8 U+E0100. The first font should be
+    // selected.
+    std::vector<FontCollection::Run> runs;
+    itemize(collection, "U+35A8 U+E0100", kDefaultFontStyle, &runs);
+    EXPECT_EQ(fontA.get(), runs[0].fakedFont.font);
+
+    itemize(reversedCollection, "U+35A8 U+E0100", kDefaultFontStyle, &runs);
+    EXPECT_EQ(fontB.get(), runs[0].fakedFont.font);
+}
+
+// For b/29585939
+TEST_F(FontCollectionItemizeTest, itemizeShouldKeepOrderForVS2) {
+    const FontStyle kDefaultFontStyle;
+
+    std::shared_ptr<MinikinFont> dummyFont(new MinikinFontForTest(kNoGlyphFont));
+    std::shared_ptr<MinikinFont> hasCmapFormat14Font(
+            new MinikinFontForTest(kHasCmapFormat14Font));
+    std::shared_ptr<MinikinFont> noCmapFormat14Font(
+            new MinikinFontForTest(kNoCmapFormat14Font));
+
+    std::shared_ptr<FontFamily> dummyFamily(new FontFamily(
+            std::vector<Font>({ Font(dummyFont, FontStyle()) })));
+    std::shared_ptr<FontFamily> hasCmapFormat14Family(new FontFamily(
+            std::vector<Font>({ Font(hasCmapFormat14Font, FontStyle()) })));
+    std::shared_ptr<FontFamily> noCmapFormat14Family(new FontFamily(
+            std::vector<Font>({ Font(noCmapFormat14Font, FontStyle()) })));
+
+    std::vector<std::shared_ptr<FontFamily>> families =
+            { dummyFamily, hasCmapFormat14Family, noCmapFormat14Family };
+    std::vector<std::shared_ptr<FontFamily>> reversedFamilies =
+            { dummyFamily, noCmapFormat14Family, hasCmapFormat14Family };
+
+    std::shared_ptr<FontCollection> collection(new FontCollection(families));
+    std::shared_ptr<FontCollection> reversedCollection(new FontCollection(reversedFamilies));
+
+    // Both hasCmapFormat14Font/noCmapFormat14Font support U+5380 but don't support U+5380 U+E0100.
+    // The first font should be selected.
+    std::vector<FontCollection::Run> runs;
+    itemize(collection, "U+5380 U+E0100", kDefaultFontStyle, &runs);
+    EXPECT_EQ(hasCmapFormat14Font.get(), runs[0].fakedFont.font);
+
+    itemize(reversedCollection, "U+5380 U+E0100", kDefaultFontStyle, &runs);
+    EXPECT_EQ(noCmapFormat14Font.get(), runs[0].fakedFont.font);
+}
+
+}  // namespace minikin

diff --git a/tests/unittest/FontCollectionTest.cpp b/tests/unittest/FontCollectionTest.cpp
new file mode 100644
index 0000000..bef1c63
--- /dev/null
+++ b/tests/unittest/FontCollectionTest.cpp

@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <minikin/FontCollection.h>
+#include "FontTestUtils.h"
+#include "MinikinFontForTest.h"
+#include "MinikinInternal.h"
+
+namespace minikin {
+
+// The test font has following glyphs.
+// U+82A6
+// U+82A6 U+FE00 (VS1)
+// U+82A6 U+E0100 (VS17)
+// U+82A6 U+E0101 (VS18)
+// U+82A6 U+E0102 (VS19)
+// U+845B
+// U+845B U+FE01 (VS2)
+// U+845B U+E0101 (VS18)
+// U+845B U+E0102 (VS19)
+// U+845B U+E0103 (VS20)
+// U+537F
+// U+717D U+FE02 (VS3)
+// U+717D U+E0102 (VS19)
+// U+717D U+E0103 (VS20)
+const char kVsTestFont[] = kTestFontDir "/VariationSelectorTest-Regular.ttf";
+
+void expectVSGlyphs(const FontCollection* fc, uint32_t codepoint, const std::set<uint32_t>& vsSet) {
+    for (uint32_t vs = 0xFE00; vs <= 0xE01EF; ++vs) {
+        // Move to variation selectors supplements after variation selectors.
+        if (vs == 0xFF00) {
+            vs = 0xE0100;
+        }
+        if (vsSet.find(vs) == vsSet.end()) {
+            EXPECT_FALSE(fc->hasVariationSelector(codepoint, vs))
+                << "Glyph for U+" << std::hex << codepoint << " U+" << vs;
+        } else {
+            EXPECT_TRUE(fc->hasVariationSelector(codepoint, vs))
+                << "Glyph for U+" << std::hex << codepoint << " U+" << vs;
+        }
+    }
+}
+
+TEST(FontCollectionTest, hasVariationSelectorTest) {
+  std::shared_ptr<MinikinFont> font(new MinikinFontForTest(kVsTestFont));
+  std::shared_ptr<FontFamily> family(new FontFamily(
+          std::vector<Font>({ Font(font, FontStyle()) })));
+  std::vector<std::shared_ptr<FontFamily>> families({ family });
+  std::shared_ptr<FontCollection> fc(new FontCollection(families));
+
+  EXPECT_FALSE(fc->hasVariationSelector(0x82A6, 0));
+  expectVSGlyphs(fc.get(), 0x82A6, std::set<uint32_t>({0xFE00, 0xFE0E, 0xE0100, 0xE0101, 0xE0102}));
+
+  EXPECT_FALSE(fc->hasVariationSelector(0x845B, 0));
+  expectVSGlyphs(fc.get(), 0x845B, std::set<uint32_t>({0xFE01, 0xFE0E, 0xE0101, 0xE0102, 0xE0103}));
+
+  EXPECT_FALSE(fc->hasVariationSelector(0x537F, 0));
+  expectVSGlyphs(fc.get(), 0x537F, std::set<uint32_t>({0xFE0E}));
+
+  EXPECT_FALSE(fc->hasVariationSelector(0x717D, 0));
+  expectVSGlyphs(fc.get(), 0x717D, std::set<uint32_t>({0xFE02, 0xE0102, 0xE0103}));
+}
+
+const char kEmojiXmlFile[] = kTestFontDir "emoji.xml";
+
+TEST(FontCollectionTest, hasVariationSelectorTest_emoji) {
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+
+    // Both text/color font have cmap format 14 subtable entry for VS15/VS16 respectively.
+    EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0E));
+    EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0F));
+
+    // The text font has cmap format 14 subtable entry for VS15 but the color font doesn't have for
+    // VS16
+    EXPECT_TRUE(collection->hasVariationSelector(0x2626, 0xFE0E));
+    EXPECT_FALSE(collection->hasVariationSelector(0x2626, 0xFE0F));
+
+    // The color font has cmap format 14 subtable entry for VS16 but the text font doesn't have for
+    // VS15.
+    EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0E));
+    EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0F));
+
+    // Neither text/color font have cmap format 14 subtable entry for VS15/VS16.
+    EXPECT_TRUE(collection->hasVariationSelector(0x262E, 0xFE0E));
+    EXPECT_FALSE(collection->hasVariationSelector(0x262E, 0xFE0F));
+
+    // Text font doesn't support U+1F3FD. Only the color emoji fonts has. So VS15 is not supported.
+    EXPECT_FALSE(collection->hasVariationSelector(0x1F3FD, 0xFE0E));
+
+    // Text font doesn't have U+262F U+FE0E or even its base code point U+262F.
+    EXPECT_FALSE(collection->hasVariationSelector(0x262F, 0xFE0E));
+
+    // None of the fonts support U+2229.
+    EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0E));
+    EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0F));
+
+}
+
+TEST(FontCollectionTest, newEmojiTest) {
+    std::shared_ptr<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
+
+    // U+2695, U+2640, U+2642 are not in emoji catrgory in Unicode 9 but they are now in emoji
+    // category. Should return true even if U+FE0E was appended.
+    // These three emojis are only avalilable in TextEmoji.ttf but U+2695 is excluded here since it
+    // is used in other tests.
+    EXPECT_TRUE(collection->hasVariationSelector(0x2640, 0xFE0E));
+    EXPECT_FALSE(collection->hasVariationSelector(0x2640, 0xFE0F));
+    EXPECT_TRUE(collection->hasVariationSelector(0x2642, 0xFE0E));
+    EXPECT_FALSE(collection->hasVariationSelector(0x2642, 0xFE0F));
+}
+
+TEST(FontCollectionTest, createWithVariations) {
+    // This font has 'wdth' and 'wght' axes.
+    const char kMultiAxisFont[] = kTestFontDir "/MultiAxis.ttf";
+    const char kNoAxisFont[] = kTestFontDir "/Regular.ttf";
+
+    std::shared_ptr<MinikinFont> multiAxisFont(new MinikinFontForTest(kMultiAxisFont));
+    std::shared_ptr<FontFamily> multiAxisFamily(new FontFamily(
+            std::vector<Font>({ Font(multiAxisFont, FontStyle()) })));
+    std::vector<std::shared_ptr<FontFamily>> multiAxisFamilies({multiAxisFamily});
+    std::shared_ptr<FontCollection> multiAxisFc(new FontCollection(multiAxisFamilies));
+
+    std::shared_ptr<MinikinFont> noAxisFont(new MinikinFontForTest(kNoAxisFont));
+    std::shared_ptr<FontFamily> noAxisFamily(new FontFamily(
+            std::vector<Font>({ Font(noAxisFont, FontStyle()) })));
+    std::vector<std::shared_ptr<FontFamily>> noAxisFamilies({noAxisFamily});
+    std::shared_ptr<FontCollection> noAxisFc(new FontCollection(noAxisFamilies));
+
+    {
+        // Do not ceate new instance if none of variations are specified.
+        EXPECT_EQ(nullptr,
+                multiAxisFc->createCollectionWithVariation(std::vector<FontVariation>()));
+        EXPECT_EQ(nullptr,
+                noAxisFc->createCollectionWithVariation(std::vector<FontVariation>()));
+    }
+    {
+        // New instance should be used for supported variation.
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f }
+        };
+        std::shared_ptr<FontCollection> newFc(
+                multiAxisFc->createCollectionWithVariation(variations));
+        EXPECT_NE(nullptr, newFc.get());
+        EXPECT_NE(multiAxisFc.get(), newFc.get());
+
+        EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations));
+    }
+    {
+        // New instance should be used for supported variation (multiple variations case).
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f },
+                { MinikinFont::MakeTag('w', 'g', 'h', 't'), 1.0f }
+        };
+        std::shared_ptr<FontCollection> newFc(
+                multiAxisFc->createCollectionWithVariation(variations));
+        EXPECT_NE(nullptr, newFc.get());
+        EXPECT_NE(multiAxisFc.get(), newFc.get());
+
+        EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations));
+    }
+    {
+        // Do not ceate new instance if none of variations are supported.
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f }
+        };
+        EXPECT_EQ(nullptr, multiAxisFc->createCollectionWithVariation(variations));
+        EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations));
+    }
+    {
+        // At least one axis is supported, should create new instance.
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f },
+                { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f }
+        };
+        std::shared_ptr<FontCollection> newFc(
+                multiAxisFc->createCollectionWithVariation(variations));
+        EXPECT_NE(nullptr, newFc.get());
+        EXPECT_NE(multiAxisFc.get(), newFc.get());
+
+        EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations));
+    }
+}
+
+}  // namespace minikin

diff --git a/tests/unittest/FontFamilyTest.cpp b/tests/unittest/FontFamilyTest.cpp
new file mode 100644
index 0000000..90e2a64
--- /dev/null
+++ b/tests/unittest/FontFamilyTest.cpp

@@ -0,0 +1,682 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <minikin/FontFamily.h>
+
+#include <android/log.h>
+#include <gtest/gtest.h>
+
+#include "FontLanguageListCache.h"
+#include "ICUTestBase.h"
+#include "MinikinFontForTest.h"
+#include "MinikinInternal.h"
+
+namespace minikin {
+
+typedef ICUTestBase FontLanguagesTest;
+typedef ICUTestBase FontLanguageTest;
+
+static const FontLanguages& createFontLanguages(const std::string& input) {
+    android::AutoMutex _l(gMinikinLock);
+    uint32_t langId = FontLanguageListCache::getId(input);
+    return FontLanguageListCache::getById(langId);
+}
+
+static FontLanguage createFontLanguage(const std::string& input) {
+    android::AutoMutex _l(gMinikinLock);
+    uint32_t langId = FontLanguageListCache::getId(input);
+    return FontLanguageListCache::getById(langId)[0];
+}
+
+static FontLanguage createFontLanguageWithoutICUSanitization(const std::string& input) {
+    return FontLanguage(input.c_str(), input.size());
+}
+
+std::shared_ptr<FontFamily> makeFamily(const std::string& fontPath) {
+    std::shared_ptr<MinikinFont> font(new MinikinFontForTest(fontPath));
+    return std::make_shared<FontFamily>(
+            std::vector<Font>({Font(font, FontStyle())}));
+}
+
+TEST_F(FontLanguageTest, basicTests) {
+    FontLanguage defaultLang;
+    FontLanguage emptyLang("", 0);
+    FontLanguage english = createFontLanguage("en");
+    FontLanguage french = createFontLanguage("fr");
+    FontLanguage und = createFontLanguage("und");
+    FontLanguage undZsye = createFontLanguage("und-Zsye");
+
+    EXPECT_EQ(english, english);
+    EXPECT_EQ(french, french);
+
+    EXPECT_TRUE(defaultLang != defaultLang);
+    EXPECT_TRUE(emptyLang != emptyLang);
+    EXPECT_TRUE(defaultLang != emptyLang);
+    EXPECT_TRUE(defaultLang != und);
+    EXPECT_TRUE(emptyLang != und);
+    EXPECT_TRUE(english != defaultLang);
+    EXPECT_TRUE(english != emptyLang);
+    EXPECT_TRUE(english != french);
+    EXPECT_TRUE(english != undZsye);
+    EXPECT_TRUE(und != undZsye);
+    EXPECT_TRUE(english != und);
+
+    EXPECT_TRUE(defaultLang.isUnsupported());
+    EXPECT_TRUE(emptyLang.isUnsupported());
+
+    EXPECT_FALSE(english.isUnsupported());
+    EXPECT_FALSE(french.isUnsupported());
+    EXPECT_FALSE(und.isUnsupported());
+    EXPECT_FALSE(undZsye.isUnsupported());
+}
+
+TEST_F(FontLanguageTest, getStringTest) {
+    EXPECT_EQ("en-Latn-US", createFontLanguage("en").getString());
+    EXPECT_EQ("en-Latn-US", createFontLanguage("en-Latn").getString());
+
+    // Capitalized language code or lowercased script should be normalized.
+    EXPECT_EQ("en-Latn-US", createFontLanguage("EN-LATN").getString());
+    EXPECT_EQ("en-Latn-US", createFontLanguage("EN-latn").getString());
+    EXPECT_EQ("en-Latn-US", createFontLanguage("en-latn").getString());
+
+    // Invalid script should be kept.
+    EXPECT_EQ("en-Xyzt-US", createFontLanguage("en-xyzt").getString());
+
+    EXPECT_EQ("en-Latn-US", createFontLanguage("en-Latn-US").getString());
+    EXPECT_EQ("ja-Jpan-JP", createFontLanguage("ja").getString());
+    EXPECT_EQ("zh-Hant-TW", createFontLanguage("zh-TW").getString());
+    EXPECT_EQ("zh-Hant-HK", createFontLanguage("zh-HK").getString());
+    EXPECT_EQ("zh-Hant-MO", createFontLanguage("zh-MO").getString());
+    EXPECT_EQ("zh-Hans-CN", createFontLanguage("zh").getString());
+    EXPECT_EQ("zh-Hans-CN", createFontLanguage("zh-CN").getString());
+    EXPECT_EQ("zh-Hans-SG", createFontLanguage("zh-SG").getString());
+    EXPECT_EQ("und", createFontLanguage("und").getString());
+    EXPECT_EQ("und", createFontLanguage("UND").getString());
+    EXPECT_EQ("und", createFontLanguage("Und").getString());
+    EXPECT_EQ("und-Zsye", createFontLanguage("und-Zsye").getString());
+    EXPECT_EQ("und-Zsye", createFontLanguage("Und-ZSYE").getString());
+    EXPECT_EQ("und-Zsye", createFontLanguage("Und-zsye").getString());
+
+    EXPECT_EQ("de-Latn-DE", createFontLanguage("de-1901").getString());
+
+    EXPECT_EQ("es-Latn-419", createFontLanguage("es-Latn-419").getString());
+
+    // Emoji subtag is dropped from getString().
+    EXPECT_EQ("es-Latn-419", createFontLanguage("es-419-u-em-emoji").getString());
+    EXPECT_EQ("es-Latn-419", createFontLanguage("es-Latn-419-u-em-emoji").getString());
+
+    // This is not a necessary desired behavior, just known behavior.
+    EXPECT_EQ("en-Latn-US", createFontLanguage("und-Abcdefgh").getString());
+}
+
+TEST_F(FontLanguageTest, testReconstruction) {
+    EXPECT_EQ("en", createFontLanguageWithoutICUSanitization("en").getString());
+    EXPECT_EQ("fil", createFontLanguageWithoutICUSanitization("fil").getString());
+    EXPECT_EQ("und", createFontLanguageWithoutICUSanitization("und").getString());
+
+    EXPECT_EQ("en-Latn", createFontLanguageWithoutICUSanitization("en-Latn").getString());
+    EXPECT_EQ("fil-Taga", createFontLanguageWithoutICUSanitization("fil-Taga").getString());
+    EXPECT_EQ("und-Zsye", createFontLanguageWithoutICUSanitization("und-Zsye").getString());
+
+    EXPECT_EQ("en-US", createFontLanguageWithoutICUSanitization("en-US").getString());
+    EXPECT_EQ("fil-PH", createFontLanguageWithoutICUSanitization("fil-PH").getString());
+    EXPECT_EQ("es-419", createFontLanguageWithoutICUSanitization("es-419").getString());
+
+    EXPECT_EQ("en-Latn-US", createFontLanguageWithoutICUSanitization("en-Latn-US").getString());
+    EXPECT_EQ("fil-Taga-PH", createFontLanguageWithoutICUSanitization("fil-Taga-PH").getString());
+    EXPECT_EQ("es-Latn-419", createFontLanguageWithoutICUSanitization("es-Latn-419").getString());
+
+    // Possible minimum/maximum values.
+    EXPECT_EQ("aa", createFontLanguageWithoutICUSanitization("aa").getString());
+    EXPECT_EQ("zz", createFontLanguageWithoutICUSanitization("zz").getString());
+    EXPECT_EQ("aa-Aaaa", createFontLanguageWithoutICUSanitization("aa-Aaaa").getString());
+    EXPECT_EQ("zz-Zzzz", createFontLanguageWithoutICUSanitization("zz-Zzzz").getString());
+    EXPECT_EQ("aaa-Aaaa-AA", createFontLanguageWithoutICUSanitization("aaa-Aaaa-AA").getString());
+    EXPECT_EQ("zzz-Zzzz-ZZ", createFontLanguageWithoutICUSanitization("zzz-Zzzz-ZZ").getString());
+    EXPECT_EQ("aaa-Aaaa-000", createFontLanguageWithoutICUSanitization("aaa-Aaaa-000").getString());
+    EXPECT_EQ("zzz-Zzzz-999", createFontLanguageWithoutICUSanitization("zzz-Zzzz-999").getString());
+}
+
+TEST_F(FontLanguageTest, ScriptEqualTest) {
+    EXPECT_TRUE(createFontLanguage("en").isEqualScript(createFontLanguage("en")));
+    EXPECT_TRUE(createFontLanguage("en-Latn").isEqualScript(createFontLanguage("en")));
+    EXPECT_TRUE(createFontLanguage("jp-Latn").isEqualScript(createFontLanguage("en-Latn")));
+    EXPECT_TRUE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Jpan")));
+
+    EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Hira")));
+    EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Hani")));
+}
+
+TEST_F(FontLanguageTest, ScriptMatchTest) {
+    const bool SUPPORTED = true;
+    const bool NOT_SUPPORTED = false;
+
+    struct TestCase {
+        const std::string baseScript;
+        const std::string requestedScript;
+        bool isSupported;
+    } testCases[] = {
+        // Same scripts
+        { "en-Latn", "Latn", SUPPORTED },
+        { "ja-Jpan", "Jpan", SUPPORTED },
+        { "ja-Hira", "Hira", SUPPORTED },
+        { "ja-Kana", "Kana", SUPPORTED },
+        { "ja-Hrkt", "Hrkt", SUPPORTED },
+        { "zh-Hans", "Hans", SUPPORTED },
+        { "zh-Hant", "Hant", SUPPORTED },
+        { "zh-Hani", "Hani", SUPPORTED },
+        { "ko-Kore", "Kore", SUPPORTED },
+        { "ko-Hang", "Hang", SUPPORTED },
+        { "zh-Hanb", "Hanb", SUPPORTED },
+
+        // Japanese supports Hiragana, Katakanara, etc.
+        { "ja-Jpan", "Hira", SUPPORTED },
+        { "ja-Jpan", "Kana", SUPPORTED },
+        { "ja-Jpan", "Hrkt", SUPPORTED },
+        { "ja-Hrkt", "Hira", SUPPORTED },
+        { "ja-Hrkt", "Kana", SUPPORTED },
+
+        // Chinese supports Han.
+        { "zh-Hans", "Hani", SUPPORTED },
+        { "zh-Hant", "Hani", SUPPORTED },
+        { "zh-Hanb", "Hani", SUPPORTED },
+
+        // Hanb supports Bopomofo.
+        { "zh-Hanb", "Bopo", SUPPORTED },
+
+        // Korean supports Hangul.
+        { "ko-Kore", "Hang", SUPPORTED },
+
+        // Different scripts
+        { "ja-Jpan", "Latn", NOT_SUPPORTED },
+        { "en-Latn", "Jpan", NOT_SUPPORTED },
+        { "ja-Jpan", "Hant", NOT_SUPPORTED },
+        { "zh-Hant", "Jpan", NOT_SUPPORTED },
+        { "ja-Jpan", "Hans", NOT_SUPPORTED },
+        { "zh-Hans", "Jpan", NOT_SUPPORTED },
+        { "ja-Jpan", "Kore", NOT_SUPPORTED },
+        { "ko-Kore", "Jpan", NOT_SUPPORTED },
+        { "zh-Hans", "Hant", NOT_SUPPORTED },
+        { "zh-Hant", "Hans", NOT_SUPPORTED },
+        { "zh-Hans", "Kore", NOT_SUPPORTED },
+        { "ko-Kore", "Hans", NOT_SUPPORTED },
+        { "zh-Hant", "Kore", NOT_SUPPORTED },
+        { "ko-Kore", "Hant", NOT_SUPPORTED },
+
+        // Hiragana doesn't support Japanese, etc.
+        { "ja-Hira", "Jpan", NOT_SUPPORTED },
+        { "ja-Kana", "Jpan", NOT_SUPPORTED },
+        { "ja-Hrkt", "Jpan", NOT_SUPPORTED },
+        { "ja-Hani", "Jpan", NOT_SUPPORTED },
+        { "ja-Hira", "Hrkt", NOT_SUPPORTED },
+        { "ja-Kana", "Hrkt", NOT_SUPPORTED },
+        { "ja-Hani", "Hrkt", NOT_SUPPORTED },
+        { "ja-Hani", "Hira", NOT_SUPPORTED },
+        { "ja-Hani", "Kana", NOT_SUPPORTED },
+
+        // Kanji doesn't support Chinese, etc.
+        { "zh-Hani", "Hant", NOT_SUPPORTED },
+        { "zh-Hani", "Hans", NOT_SUPPORTED },
+        { "zh-Hani", "Hanb", NOT_SUPPORTED },
+
+        // Hangul doesn't support Korean, etc.
+        { "ko-Hang", "Kore", NOT_SUPPORTED },
+        { "ko-Hani", "Kore", NOT_SUPPORTED },
+        { "ko-Hani", "Hang", NOT_SUPPORTED },
+        { "ko-Hang", "Hani", NOT_SUPPORTED },
+
+        // Han with botomofo doesn't support simplified Chinese, etc.
+        { "zh-Hanb", "Hant", NOT_SUPPORTED },
+        { "zh-Hanb", "Hans", NOT_SUPPORTED },
+        { "zh-Hanb", "Jpan", NOT_SUPPORTED },
+        { "zh-Hanb", "Kore", NOT_SUPPORTED },
+    };
+
+    for (auto testCase : testCases) {
+        hb_script_t script = hb_script_from_iso15924_tag(
+                HB_TAG(testCase.requestedScript[0], testCase.requestedScript[1],
+                       testCase.requestedScript[2], testCase.requestedScript[3]));
+        if (testCase.isSupported) {
+            EXPECT_TRUE(
+                    createFontLanguage(testCase.baseScript).supportsHbScript(script))
+                    << testCase.baseScript << " should support " << testCase.requestedScript;
+        } else {
+            EXPECT_FALSE(
+                    createFontLanguage(testCase.baseScript).supportsHbScript(script))
+                    << testCase.baseScript << " shouldn't support " << testCase.requestedScript;
+        }
+    }
+}
+
+TEST_F(FontLanguagesTest, basicTests) {
+    FontLanguages emptyLangs;
+    EXPECT_EQ(0u, emptyLangs.size());
+
+    FontLanguage english = createFontLanguage("en");
+    const FontLanguages& singletonLangs = createFontLanguages("en");
+    EXPECT_EQ(1u, singletonLangs.size());
+    EXPECT_EQ(english, singletonLangs[0]);
+
+    FontLanguage french = createFontLanguage("fr");
+    const FontLanguages& twoLangs = createFontLanguages("en,fr");
+    EXPECT_EQ(2u, twoLangs.size());
+    EXPECT_EQ(english, twoLangs[0]);
+    EXPECT_EQ(french, twoLangs[1]);
+}
+
+TEST_F(FontLanguagesTest, unsupportedLanguageTests) {
+    const FontLanguages& oneUnsupported = createFontLanguages("abcd-example");
+    EXPECT_TRUE(oneUnsupported.empty());
+
+    const FontLanguages& twoUnsupporteds = createFontLanguages("abcd-example,abcd-example");
+    EXPECT_TRUE(twoUnsupporteds.empty());
+
+    FontLanguage english = createFontLanguage("en");
+    const FontLanguages& firstUnsupported = createFontLanguages("abcd-example,en");
+    EXPECT_EQ(1u, firstUnsupported.size());
+    EXPECT_EQ(english, firstUnsupported[0]);
+
+    const FontLanguages& lastUnsupported = createFontLanguages("en,abcd-example");
+    EXPECT_EQ(1u, lastUnsupported.size());
+    EXPECT_EQ(english, lastUnsupported[0]);
+}
+
+TEST_F(FontLanguagesTest, repeatedLanguageTests) {
+    FontLanguage english = createFontLanguage("en");
+    FontLanguage french = createFontLanguage("fr");
+    FontLanguage canadianFrench = createFontLanguage("fr-CA");
+    FontLanguage englishInLatn = createFontLanguage("en-Latn");
+    ASSERT_TRUE(english == englishInLatn);
+
+    const FontLanguages& langs = createFontLanguages("en,en-Latn");
+    EXPECT_EQ(1u, langs.size());
+    EXPECT_EQ(english, langs[0]);
+
+    const FontLanguages& fr = createFontLanguages("fr,fr-FR,fr-Latn-FR");
+    EXPECT_EQ(1u, fr.size());
+    EXPECT_EQ(french, fr[0]);
+
+    // ICU appends FR to fr. The third language is dropped which is same as the first language.
+    const FontLanguages& fr2 = createFontLanguages("fr,fr-CA,fr-FR");
+    EXPECT_EQ(2u, fr2.size());
+    EXPECT_EQ(french, fr2[0]);
+    EXPECT_EQ(canadianFrench, fr2[1]);
+
+    // The order should be kept.
+    const FontLanguages& langs2 = createFontLanguages("en,fr,en-Latn");
+    EXPECT_EQ(2u, langs2.size());
+    EXPECT_EQ(english, langs2[0]);
+    EXPECT_EQ(french, langs2[1]);
+}
+
+TEST_F(FontLanguagesTest, identifierTest) {
+    EXPECT_EQ(createFontLanguage("en-Latn-US"), createFontLanguage("en-Latn-US"));
+    EXPECT_EQ(createFontLanguage("zh-Hans-CN"), createFontLanguage("zh-Hans-CN"));
+    EXPECT_EQ(createFontLanguage("en-Zsye-US"), createFontLanguage("en-Zsye-US"));
+
+    EXPECT_NE(createFontLanguage("en-Latn-US"), createFontLanguage("en-Latn-GB"));
+    EXPECT_NE(createFontLanguage("en-Latn-US"), createFontLanguage("en-Zsye-US"));
+    EXPECT_NE(createFontLanguage("es-Latn-US"), createFontLanguage("en-Latn-US"));
+    EXPECT_NE(createFontLanguage("zh-Hant-HK"), createFontLanguage("zh-Hant-TW"));
+}
+
+TEST_F(FontLanguagesTest, undEmojiTests) {
+    FontLanguage emoji = createFontLanguage("und-Zsye");
+    EXPECT_EQ(FontLanguage::EMSTYLE_EMOJI, emoji.getEmojiStyle());
+
+    FontLanguage und = createFontLanguage("und");
+    EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, und.getEmojiStyle());
+    EXPECT_FALSE(emoji == und);
+
+    FontLanguage undExample = createFontLanguage("und-example");
+    EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, undExample.getEmojiStyle());
+    EXPECT_FALSE(emoji == undExample);
+}
+
+TEST_F(FontLanguagesTest, subtagEmojiTest) {
+    std::string subtagEmojiStrings[] = {
+        // Duplicate subtag case.
+        "und-Latn-u-em-emoji-u-em-text",
+
+        // Strings that contain language.
+        "und-u-em-emoji",
+        "en-u-em-emoji",
+
+        // Strings that contain the script.
+        "und-Jpan-u-em-emoji",
+        "en-Latn-u-em-emoji",
+        "und-Zsym-u-em-emoji",
+        "und-Zsye-u-em-emoji",
+        "en-Zsym-u-em-emoji",
+        "en-Zsye-u-em-emoji",
+
+        // Strings that contain the county.
+        "und-US-u-em-emoji",
+        "en-US-u-em-emoji",
+        "es-419-u-em-emoji",
+        "und-Latn-US-u-em-emoji",
+        "en-Zsym-US-u-em-emoji",
+        "en-Zsye-US-u-em-emoji",
+        "es-Zsye-419-u-em-emoji",
+    };
+
+    for (auto subtagEmojiString : subtagEmojiStrings) {
+        SCOPED_TRACE("Test for \"" + subtagEmojiString + "\"");
+        FontLanguage subtagEmoji = createFontLanguage(subtagEmojiString);
+        EXPECT_EQ(FontLanguage::EMSTYLE_EMOJI, subtagEmoji.getEmojiStyle());
+    }
+}
+
+TEST_F(FontLanguagesTest, subtagTextTest) {
+    std::string subtagTextStrings[] = {
+        // Duplicate subtag case.
+        "und-Latn-u-em-text-u-em-emoji",
+
+        // Strings that contain language.
+        "und-u-em-text",
+        "en-u-em-text",
+
+        // Strings that contain the script.
+        "und-Latn-u-em-text",
+        "en-Jpan-u-em-text",
+        "und-Zsym-u-em-text",
+        "und-Zsye-u-em-text",
+        "en-Zsym-u-em-text",
+        "en-Zsye-u-em-text",
+
+        // Strings that contain the county.
+        "und-US-u-em-text",
+        "en-US-u-em-text",
+        "es-419-u-em-text",
+        "und-Latn-US-u-em-text",
+        "en-Zsym-US-u-em-text",
+        "en-Zsye-US-u-em-text",
+        "es-Zsye-419-u-em-text",
+    };
+
+    for (auto subtagTextString : subtagTextStrings) {
+        SCOPED_TRACE("Test for \"" + subtagTextString + "\"");
+        FontLanguage subtagText = createFontLanguage(subtagTextString);
+        EXPECT_EQ(FontLanguage::EMSTYLE_TEXT, subtagText.getEmojiStyle());
+    }
+}
+
+// TODO: add more "und" language cases whose language and script are
+//       unexpectedly translated to en-Latn by ICU.
+TEST_F(FontLanguagesTest, subtagDefaultTest) {
+    std::string subtagDefaultStrings[] = {
+        // Duplicate subtag case.
+        "en-Latn-u-em-default-u-em-emoji",
+        "en-Latn-u-em-default-u-em-text",
+
+        // Strings that contain language.
+        "und-u-em-default",
+        "en-u-em-default",
+
+        // Strings that contain the script.
+        "en-Latn-u-em-default",
+        "en-Zsym-u-em-default",
+        "en-Zsye-u-em-default",
+
+        // Strings that contain the county.
+        "en-US-u-em-default",
+        "en-Latn-US-u-em-default",
+        "es-Latn-419-u-em-default",
+        "en-Zsym-US-u-em-default",
+        "en-Zsye-US-u-em-default",
+        "es-Zsye-419-u-em-default",
+    };
+
+    for (auto subtagDefaultString : subtagDefaultStrings) {
+        SCOPED_TRACE("Test for \"" + subtagDefaultString + "\"");
+        FontLanguage subtagDefault = createFontLanguage(subtagDefaultString);
+        EXPECT_EQ(FontLanguage::EMSTYLE_DEFAULT, subtagDefault.getEmojiStyle());
+    }
+}
+
+TEST_F(FontLanguagesTest, subtagEmptyTest) {
+    std::string subtagEmptyStrings[] = {
+        "und",
+        "jp",
+        "en-US",
+        "en-Latn",
+        "en-Latn-US",
+        "en-Latn-US-u-em",
+        "en-Latn-US-u-em-defaultemoji",
+    };
+
+    for (auto subtagEmptyString : subtagEmptyStrings) {
+        SCOPED_TRACE("Test for \"" + subtagEmptyString + "\"");
+        FontLanguage subtagEmpty = createFontLanguage(subtagEmptyString);
+        EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, subtagEmpty.getEmojiStyle());
+    }
+}
+
+TEST_F(FontLanguagesTest, registerLanguageListTest) {
+    EXPECT_EQ(0UL, FontStyle::registerLanguageList(""));
+    EXPECT_NE(0UL, FontStyle::registerLanguageList("en"));
+    EXPECT_NE(0UL, FontStyle::registerLanguageList("jp"));
+    EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans"));
+
+    EXPECT_EQ(FontStyle::registerLanguageList("en"), FontStyle::registerLanguageList("en"));
+    EXPECT_NE(FontStyle::registerLanguageList("en"), FontStyle::registerLanguageList("jp"));
+
+    EXPECT_EQ(FontStyle::registerLanguageList("en,zh-Hans"),
+              FontStyle::registerLanguageList("en,zh-Hans"));
+    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
+              FontStyle::registerLanguageList("zh-Hans,en"));
+    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
+              FontStyle::registerLanguageList("jp"));
+    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
+              FontStyle::registerLanguageList("en"));
+    EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"),
+              FontStyle::registerLanguageList("en,zh-Hant"));
+}
+
+// The test font has following glyphs.
+// U+82A6
+// U+82A6 U+FE00 (VS1)
+// U+82A6 U+E0100 (VS17)
+// U+82A6 U+E0101 (VS18)
+// U+82A6 U+E0102 (VS19)
+// U+845B
+// U+845B U+FE00 (VS2)
+// U+845B U+E0101 (VS18)
+// U+845B U+E0102 (VS19)
+// U+845B U+E0103 (VS20)
+// U+537F
+// U+717D U+FE02 (VS3)
+// U+717D U+E0102 (VS19)
+// U+717D U+E0103 (VS20)
+const char kVsTestFont[] = kTestFontDir "VariationSelectorTest-Regular.ttf";
+
+class FontFamilyTest : public ICUTestBase {
+public:
+    virtual void SetUp() override {
+        ICUTestBase::SetUp();
+        if (access(kVsTestFont, R_OK) != 0) {
+            FAIL() << "Unable to read " << kVsTestFont << ". "
+                   << "Please prepare the test data directory. "
+                   << "For more details, please see how_to_run.txt.";
+        }
+    }
+};
+
+// Asserts that the font family has glyphs for and only for specified codepoint
+// and variationSelector pairs.
+void expectVSGlyphs(FontFamily* family, uint32_t codepoint, const std::set<uint32_t>& vs) {
+    for (uint32_t i = 0xFE00; i <= 0xE01EF; ++i) {
+        // Move to variation selectors supplements after variation selectors.
+        if (i == 0xFF00) {
+            i = 0xE0100;
+        }
+        if (vs.find(i) == vs.end()) {
+            EXPECT_FALSE(family->hasGlyph(codepoint, i))
+                    << "Glyph for U+" << std::hex << codepoint << " U+" << i;
+        } else {
+            EXPECT_TRUE(family->hasGlyph(codepoint, i))
+                    << "Glyph for U+" << std::hex << codepoint << " U+" << i;
+        }
+
+    }
+}
+
+TEST_F(FontFamilyTest, hasVariationSelectorTest) {
+    std::shared_ptr<MinikinFont> minikinFont(new MinikinFontForTest(kVsTestFont));
+    std::shared_ptr<FontFamily> family(
+            new FontFamily(std::vector<Font>{ Font(minikinFont, FontStyle()) }));
+
+    const uint32_t kVS1 = 0xFE00;
+    const uint32_t kVS2 = 0xFE01;
+    const uint32_t kVS3 = 0xFE02;
+    const uint32_t kVS17 = 0xE0100;
+    const uint32_t kVS18 = 0xE0101;
+    const uint32_t kVS19 = 0xE0102;
+    const uint32_t kVS20 = 0xE0103;
+
+    const uint32_t kSupportedChar1 = 0x82A6;
+    EXPECT_TRUE(family->getCoverage().get(kSupportedChar1));
+    expectVSGlyphs(family.get(), kSupportedChar1, std::set<uint32_t>({kVS1, kVS17, kVS18, kVS19}));
+
+    const uint32_t kSupportedChar2 = 0x845B;
+    EXPECT_TRUE(family->getCoverage().get(kSupportedChar2));
+    expectVSGlyphs(family.get(), kSupportedChar2, std::set<uint32_t>({kVS2, kVS18, kVS19, kVS20}));
+
+    const uint32_t kNoVsSupportedChar = 0x537F;
+    EXPECT_TRUE(family->getCoverage().get(kNoVsSupportedChar));
+    expectVSGlyphs(family.get(), kNoVsSupportedChar, std::set<uint32_t>());
+
+    const uint32_t kVsOnlySupportedChar = 0x717D;
+    EXPECT_FALSE(family->getCoverage().get(kVsOnlySupportedChar));
+    expectVSGlyphs(family.get(), kVsOnlySupportedChar, std::set<uint32_t>({kVS3, kVS19, kVS20}));
+
+    const uint32_t kNotSupportedChar = 0x845C;
+    EXPECT_FALSE(family->getCoverage().get(kNotSupportedChar));
+    expectVSGlyphs(family.get(), kNotSupportedChar, std::set<uint32_t>());
+}
+
+TEST_F(FontFamilyTest, hasVSTableTest) {
+    struct TestCase {
+        const std::string fontPath;
+        bool hasVSTable;
+    } testCases[] = {
+        { kTestFontDir "Ja.ttf", true },
+        { kTestFontDir "ZhHant.ttf", true },
+        { kTestFontDir "ZhHans.ttf", true },
+        { kTestFontDir "Italic.ttf", false },
+        { kTestFontDir "Bold.ttf", false },
+        { kTestFontDir "BoldItalic.ttf", false },
+    };
+
+    for (auto testCase : testCases) {
+        SCOPED_TRACE(testCase.hasVSTable ?
+                "Font " + testCase.fontPath + " should have a variation sequence table." :
+                "Font " + testCase.fontPath + " shouldn't have a variation sequence table.");
+
+        std::shared_ptr<MinikinFont> minikinFont(
+                new MinikinFontForTest(testCase.fontPath));
+        std::shared_ptr<FontFamily> family(new FontFamily(
+                std::vector<Font>{ Font(minikinFont, FontStyle()) }));
+        EXPECT_EQ(testCase.hasVSTable, family->hasVSTable());
+    }
+}
+
+TEST_F(FontFamilyTest, createFamilyWithVariationTest) {
+    // This font has 'wdth' and 'wght' axes.
+    const char kMultiAxisFont[] = kTestFontDir "/MultiAxis.ttf";
+    const char kNoAxisFont[] = kTestFontDir "/Regular.ttf";
+
+    std::shared_ptr<FontFamily> multiAxisFamily = makeFamily(kMultiAxisFont);
+    std::shared_ptr<FontFamily> noAxisFamily = makeFamily(kNoAxisFont);
+
+    {
+        // Do not ceate new instance if none of variations are specified.
+        EXPECT_EQ(nullptr,
+                multiAxisFamily->createFamilyWithVariation(std::vector<FontVariation>()));
+        EXPECT_EQ(nullptr,
+                noAxisFamily->createFamilyWithVariation(std::vector<FontVariation>()));
+    }
+    {
+        // New instance should be used for supported variation.
+        std::vector<FontVariation> variations = {{MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}};
+        std::shared_ptr<FontFamily> newFamily(
+                multiAxisFamily->createFamilyWithVariation(variations));
+        EXPECT_NE(nullptr, newFamily.get());
+        EXPECT_NE(multiAxisFamily.get(), newFamily.get());
+        EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations));
+    }
+    {
+        // New instance should be used for supported variation. (multiple variations case)
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f },
+                { MinikinFont::MakeTag('w', 'g', 'h', 't'), 1.0f }
+        };
+        std::shared_ptr<FontFamily> newFamily(
+                multiAxisFamily->createFamilyWithVariation(variations));
+        EXPECT_NE(nullptr, newFamily.get());
+        EXPECT_NE(multiAxisFamily.get(), newFamily.get());
+        EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations));
+    }
+    {
+        // Do not ceate new instance if none of variations are supported.
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f }
+        };
+        EXPECT_EQ(nullptr, multiAxisFamily->createFamilyWithVariation(variations));
+        EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations));
+    }
+    {
+        // At least one axis is supported, should create new instance.
+        std::vector<FontVariation> variations = {
+                { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f },
+                { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f }
+        };
+        std::shared_ptr<FontFamily> newFamily(
+                multiAxisFamily->createFamilyWithVariation(variations));
+        EXPECT_NE(nullptr, newFamily.get());
+        EXPECT_NE(multiAxisFamily.get(), newFamily.get());
+        EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations));
+    }
+}
+
+TEST_F(FontFamilyTest, coverageTableSelectionTest) {
+    // This font supports U+0061. The cmap subtable is format 4 and its platform ID is 0 and
+    // encoding ID is 1.
+    const char kUnicodeEncoding1Font[] = kTestFontDir "UnicodeBMPOnly.ttf";
+
+    // This font supports U+0061. The cmap subtable is format 4 and its platform ID is 0 and
+    // encoding ID is 3.
+    const char kUnicodeEncoding3Font[] = kTestFontDir "UnicodeBMPOnly2.ttf";
+
+    // This font has both cmap format 4 subtable which platform ID is 0 and encoding ID is 1
+    // and cmap format 14 subtable which platform ID is 0 and encoding ID is 10.
+    // U+0061 is listed in both subtable but U+1F926 is only listed in latter.
+    const char kUnicodeEncoding4Font[] = kTestFontDir "UnicodeUCS4.ttf";
+
+    std::shared_ptr<FontFamily> unicodeEnc1Font = makeFamily(kUnicodeEncoding1Font);
+    std::shared_ptr<FontFamily> unicodeEnc3Font = makeFamily(kUnicodeEncoding3Font);
+    std::shared_ptr<FontFamily> unicodeEnc4Font = makeFamily(kUnicodeEncoding4Font);
+
+    android::AutoMutex _l(gMinikinLock);
+
+    EXPECT_TRUE(unicodeEnc1Font->hasGlyph(0x0061, 0));
+    EXPECT_TRUE(unicodeEnc3Font->hasGlyph(0x0061, 0));
+    EXPECT_TRUE(unicodeEnc4Font->hasGlyph(0x0061, 0));
+
+    EXPECT_TRUE(unicodeEnc4Font->hasGlyph(0x1F926, 0));
+}
+
+}  // namespace minikin

diff --git a/tests/FontLanguageListCacheTest.cpp b/tests/unittest/FontLanguageListCacheTest.cpp
similarity index 95%
rename from tests/FontLanguageListCacheTest.cpp
rename to tests/unittest/FontLanguageListCacheTest.cpp
index 2a04671..81d84a8 100644
--- a/tests/FontLanguageListCacheTest.cpp
+++ b/tests/unittest/FontLanguageListCacheTest.cpp

@@ -22,7 +22,7 @@
 #include "ICUTestBase.h"
 #include "MinikinInternal.h"
 
-namespace android {
+namespace minikin {
 
 typedef ICUTestBase FontLanguageListCacheTest;
 
@@ -31,7 +31,7 @@
     EXPECT_NE(0UL, FontStyle::registerLanguageList("jp"));
     EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans"));
 
-    AutoMutex _l(gMinikinLock);
+    android::AutoMutex _l(gMinikinLock);
     EXPECT_EQ(0UL, FontLanguageListCache::getId(""));
 
     EXPECT_EQ(FontLanguageListCache::getId("en"), FontLanguageListCache::getId("en"));
@@ -50,7 +50,7 @@
 }
 
 TEST_F(FontLanguageListCacheTest, getById) {
-    AutoMutex _l(gMinikinLock);
+    android::AutoMutex _l(gMinikinLock);
     uint32_t enLangId = FontLanguageListCache::getId("en");
     uint32_t jpLangId = FontLanguageListCache::getId("jp");
     FontLanguage english = FontLanguageListCache::getById(enLangId)[0];
@@ -70,4 +70,4 @@
     EXPECT_EQ(japanese, langs2[1]);
 }
 
-}  // android
+}  // namespace minikin

diff --git a/tests/unittest/GraphemeBreakTests.cpp b/tests/unittest/GraphemeBreakTests.cpp
new file mode 100644
index 0000000..6720df6
--- /dev/null
+++ b/tests/unittest/GraphemeBreakTests.cpp

@@ -0,0 +1,317 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <UnicodeUtils.h>
+#include <minikin/GraphemeBreak.h>
+
+namespace minikin {
+
+bool IsBreak(const char* src) {
+    const size_t BUF_SIZE = 256;
+    uint16_t buf[BUF_SIZE];
+    size_t offset;
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
+    return GraphemeBreak::isGraphemeBreak(nullptr, buf, 0, size, offset);
+}
+
+bool IsBreakWithAdvances(const float* advances, const char* src) {
+    const size_t BUF_SIZE = 256;
+    uint16_t buf[BUF_SIZE];
+    size_t offset;
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
+    return GraphemeBreak::isGraphemeBreak(advances, buf, 0, size, offset);
+}
+
+TEST(GraphemeBreak, utf16) {
+    EXPECT_FALSE(IsBreak("U+D83C | U+DC31"));  // emoji, U+1F431
+
+    // tests for invalid UTF-16
+    EXPECT_TRUE(IsBreak("U+D800 | U+D800"));  // two leading surrogates
+    EXPECT_TRUE(IsBreak("U+DC00 | U+DC00"));  // two trailing surrogates
+    EXPECT_TRUE(IsBreak("'a' | U+D800"));  // lonely leading surrogate
+    EXPECT_TRUE(IsBreak("U+DC00 | 'a'"));  // lonely trailing surrogate
+    EXPECT_TRUE(IsBreak("U+D800 | 'a'"));  // leading surrogate followed by non-surrogate
+    EXPECT_TRUE(IsBreak("'a' | U+DC00"));  // non-surrogate followed by trailing surrogate
+}
+
+TEST(GraphemeBreak, rules) {
+    // Rule GB1, sot ÷; Rule GB2, ÷ eot
+    EXPECT_TRUE(IsBreak("| 'a'"));
+    EXPECT_TRUE(IsBreak("'a' |"));
+
+    // Rule GB3, CR x LF
+    EXPECT_FALSE(IsBreak("U+000D | U+000A"));  // CR x LF
+
+    // Rule GB4, (Control | CR | LF) ÷
+    EXPECT_TRUE(IsBreak("'a' | U+2028"));  // Line separator
+    EXPECT_TRUE(IsBreak("'a' | U+000D"));  // LF
+    EXPECT_TRUE(IsBreak("'a' | U+000A"));  // CR
+
+    // Rule GB5, ÷ (Control | CR | LF)
+    EXPECT_TRUE(IsBreak("U+2028 | 'a'"));  // Line separator
+    EXPECT_TRUE(IsBreak("U+000D | 'a'"));  // LF
+    EXPECT_TRUE(IsBreak("U+000A | 'a'"));  // CR
+
+    // Rule GB6, L x ( L | V | LV | LVT )
+    EXPECT_FALSE(IsBreak("U+1100 | U+1100"));  // L x L
+    EXPECT_FALSE(IsBreak("U+1100 | U+1161"));  // L x V
+    EXPECT_FALSE(IsBreak("U+1100 | U+AC00"));  // L x LV
+    EXPECT_FALSE(IsBreak("U+1100 | U+AC01"));  // L x LVT
+
+    // Rule GB7, ( LV | V ) x ( V | T )
+    EXPECT_FALSE(IsBreak("U+AC00 | U+1161"));  // LV x V
+    EXPECT_FALSE(IsBreak("U+1161 | U+1161"));  // V x V
+    EXPECT_FALSE(IsBreak("U+AC00 | U+11A8"));  // LV x T
+    EXPECT_FALSE(IsBreak("U+1161 | U+11A8"));  // V x T
+
+    // Rule GB8, ( LVT | T ) x T
+    EXPECT_FALSE(IsBreak("U+AC01 | U+11A8"));  // LVT x T
+    EXPECT_FALSE(IsBreak("U+11A8 | U+11A8"));  // T x T
+
+    // Other hangul pairs not counted above _are_ breaks (GB10)
+    EXPECT_TRUE(IsBreak("U+AC00 | U+1100"));  // LV x L
+    EXPECT_TRUE(IsBreak("U+AC01 | U+1100"));  // LVT x L
+    EXPECT_TRUE(IsBreak("U+11A8 | U+1100"));  // T x L
+    EXPECT_TRUE(IsBreak("U+11A8 | U+AC00"));  // T x LV
+    EXPECT_TRUE(IsBreak("U+11A8 | U+AC01"));  // T x LVT
+
+    // Rule GB12 and Rule GB13, Regional_Indicator x Regional_Indicator
+    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
+    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
+    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
+    EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag)
+
+    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA"));  // Regional indicator pair (flag)
+    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
+    // Same case as the two above, knowing that the first two characters ligate, which is what
+    // would typically happen.
+    const float firstPairLigated[] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0}; // Two entries per codepoint
+    EXPECT_TRUE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA"));
+    EXPECT_FALSE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA"));
+    // Repeat the tests, But now the font doesn't have a ligature for the first two characters,
+    // while it does have a ligature for the last two. This could happen for fonts that do not
+    // support some (potentially encoded later than they were developed) flags.
+    const float secondPairLigated[] = {1.0, 0.0, 1.0, 0.0, 0.0, 0.0};
+    EXPECT_FALSE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA"));
+    EXPECT_TRUE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA"));
+
+    EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA"));  // Regional indicator pair (flag)
+    EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
+
+    EXPECT_TRUE(
+            IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
+    EXPECT_FALSE(
+            IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
+    EXPECT_FALSE(
+            IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
+
+    // Rule GB9, x (Extend | ZWJ)
+    EXPECT_FALSE(IsBreak("'a' | U+0301"));  // combining accent
+    EXPECT_FALSE(IsBreak("'a' | U+200D"));  // ZWJ
+    // Rule GB9a, x SpacingMark
+    EXPECT_FALSE(IsBreak("U+0915 | U+093E"));  // KA, AA (spacing mark)
+    // Rule GB9b, Prepend x
+    // see tailoring test for prepend, as current ICU doesn't have any characters in the class
+
+    // Rule GB999, Any ÷ Any
+    EXPECT_TRUE(IsBreak("'a' | 'b'"));
+    EXPECT_TRUE(IsBreak("'f' | 'i'"));  // probable ligature
+    EXPECT_TRUE(IsBreak("U+0644 | U+0627"));  // probable ligature, lam + alef
+    EXPECT_TRUE(IsBreak("U+4E00 | U+4E00"));  // CJK ideographs
+    EXPECT_TRUE(IsBreak("'a' | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
+    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | 'a'"));  // Regional indicator pair (flag)
+
+    // Extended rule for emoji tag sequence.
+    EXPECT_TRUE(IsBreak("'a' | U+1F3F4 'a'"));
+    EXPECT_TRUE(IsBreak("'a' U+1F3F4 | 'a'"));
+
+    // Immediate tag_term after tag_base.
+    EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E007F 'a'"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 | U+E007F"));
+    EXPECT_TRUE(IsBreak("'a' U+1F3F4 U+E007F | 'a'"));
+
+    // Flag sequence
+    // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag sequence for the flag
+    // of Scotland.
+    // U+1F3F4 is WAVING BLACK FLAG. This can be a tag_base character.
+    // U+E0067 is TAG LATIN SMALL LETTER G. This can be a part of tag_spec.
+    // U+E0062 is TAG LATIN SMALL LETTER B. This can be a part of tag_spec.
+    // U+E0073 is TAG LATIN SMALL LETTER S. This can be a part of tag_spec.
+    // U+E0063 is TAG LATIN SMALL LETTER C. This can be a part of tag_spec.
+    // U+E0074 is TAG LATIN SMALL LETTER T. This can be a part of tag_spec.
+    // U+E007F is CANCEL TAG. This is a tag_term character.
+    EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 | U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 | U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 | U+E0073 U+E0063 U+E0074 U+E007F"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 | U+E0063 U+E0074 U+E007F"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 | U+E0074 U+E007F"));
+    EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 | U+E007F"));
+    EXPECT_TRUE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F | 'a'"));
+}
+
+TEST(GraphemeBreak, tailoring) {
+    // control characters that we interpret as "extend"
+    EXPECT_FALSE(IsBreak("'a' | U+00AD"));  // soft hyphen
+    EXPECT_FALSE(IsBreak("'a' | U+200B"));  // zwsp
+    EXPECT_FALSE(IsBreak("'a' | U+200E"));  // lrm
+    EXPECT_FALSE(IsBreak("'a' | U+202A"));  // lre
+    EXPECT_FALSE(IsBreak("'a' | U+E0041"));  // tag character
+
+    // UTC-approved characters for the Prepend class
+    EXPECT_FALSE(IsBreak("U+06DD | U+0661"));  // arabic subtending mark + digit one
+
+    EXPECT_TRUE(IsBreak("U+0E01 | U+0E33"));  // Thai sara am
+
+    // virama is not a grapheme break, but "pure killer" is
+    EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
+    EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
+    EXPECT_FALSE(IsBreak("U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
+    EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
+
+    // Repetition of above tests, but with a given advances array that implies everything
+    // became just one cluster.
+    const float conjoined[] = {1.0, 0.0, 0.0};
+    EXPECT_FALSE(IsBreakWithAdvances(conjoined,
+            "U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
+    EXPECT_FALSE(IsBreakWithAdvances(conjoined,
+            "U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
+    EXPECT_FALSE(IsBreakWithAdvances(conjoined,
+            "U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
+    EXPECT_TRUE(IsBreakWithAdvances(conjoined,
+            "U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
+
+    // Repetition of above tests, but with a given advances array that the virama did not
+    // form a cluster with the following consonant. The difference is that there is now
+    // a grapheme break after the virama in ka+virama+ka.
+    const float separate[] = {1.0, 0.0, 1.0};
+    EXPECT_FALSE(IsBreakWithAdvances(separate,
+            "U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
+    EXPECT_TRUE(IsBreakWithAdvances(separate,
+            "U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
+    EXPECT_FALSE(IsBreakWithAdvances(separate,
+            "U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
+    EXPECT_TRUE(IsBreakWithAdvances(separate,
+            "U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
+
+    // suppress grapheme breaks in zwj emoji sequences
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468"));
+    EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466"));
+    EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466"));
+    EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8"));
+
+    // Do not break before and after zwj with all kind of emoji characters.
+    EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464"));
+    EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464"));
+
+    // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break
+    EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764"));
+}
+
+TEST(GraphemeBreak, emojiModifiers) {
+    EXPECT_FALSE(IsBreak("U+261D | U+1F3FB"));  // white up pointing index + modifier
+    EXPECT_FALSE(IsBreak("U+270C | U+1F3FB"));  // victory hand + modifier
+    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB"));  // boy + modifier
+    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC"));  // boy + modifier
+    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD"));  // boy + modifier
+    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE"));  // boy + modifier
+    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF"));  // boy + modifier
+    EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF"));  // sign of the horns + modifier
+    EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF"));  // selfie (Unicode 9) + modifier
+    // Reptition of the tests above, with the knowledge that they are ligated.
+    const float ligated1_2[] = {1.0, 0.0, 0.0};
+    const float ligated2_2[] = {1.0, 0.0, 0.0, 0.0};
+    EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+261D | U+1F3FB"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+270C | U+1F3FB"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FB"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FC"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FD"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FE"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FF"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F918 | U+1F3FF"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F933 | U+1F3FF"));
+    // Reptition of the tests above, with the knowledge that they are not ligated.
+    const float unligated1_2[] = {1.0, 1.0, 0.0};
+    const float unligated2_2[] = {1.0, 0.0, 1.0, 0.0};
+    EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+261D | U+1F3FB"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+270C | U+1F3FB"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FB"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FC"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FD"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FE"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FF"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F918 | U+1F3FF"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F933 | U+1F3FF"));
+
+    // adding extend characters between emoji base and modifier doesn't affect grapheme cluster
+    EXPECT_FALSE(IsBreak("U+270C U+FE0E | U+1F3FB"));  // victory hand + text style + modifier
+    EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
+    // Reptition of the two tests above, with the knowledge that they are ligated.
+    const float ligated1_1_2[] = {1.0, 0.0, 0.0, 0.0};
+    EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0E | U+1F3FB"));
+    EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0F | U+1F3FB"));
+    // Reptition of the first two tests, with the knowledge that they are not ligated.
+    const float unligated1_1_2[] = {1.0, 0.0, 1.0, 0.0};
+    EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0E | U+1F3FB"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0F | U+1F3FB"));
+
+    // heart is not an emoji base
+    EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB"));  // heart + modifier
+    EXPECT_TRUE(IsBreak("U+2764 U+FE0E | U+1F3FB"));  // heart + emoji style + modifier
+    EXPECT_TRUE(IsBreak("U+2764 U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
+    EXPECT_TRUE(IsBreak("U+1F3FB | U+1F3FB"));  // modifier + modifier
+
+    // rat is not an emoji modifer
+    EXPECT_TRUE(IsBreak("U+1F466 | U+1F400"));  // boy + rat
+}
+
+TEST(GraphemeBreak, genderBalancedEmoji) {
+    // U+1F469 is WOMAN, U+200D is ZWJ, U+1F4BC is BRIEFCASE.
+    EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+1F4BC"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F4BC"));
+    // The above two cases, when the ligature is not supported in the font. We now expect a break
+    // between them.
+    const float unligated2_1_2[] = {1.0, 0.0, 0.0, 1.0, 0.0};
+    EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 | U+200D U+1F4BC"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 U+200D | U+1F4BC"));
+
+    // U+2695 has now emoji property, so should be part of ZWJ sequence.
+    EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+2695"));
+    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2695"));
+    // The above two cases, when the ligature is not supported in the font. We now expect a break
+    // between them.
+    const float unligated2_1_1[] = {1.0, 0.0, 0.0, 1.0};
+    EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 | U+200D U+2695"));
+    EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 U+200D | U+2695"));
+}
+
+TEST(GraphemeBreak, offsets) {
+    uint16_t string[] = { 0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301 };
+    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 2));
+    EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 3));
+    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 4));
+    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 5));
+}
+
+}  // namespace minikin

diff --git a/tests/unittest/HbFontCacheTest.cpp b/tests/unittest/HbFontCacheTest.cpp
new file mode 100644
index 0000000..a5581a2
--- /dev/null
+++ b/tests/unittest/HbFontCacheTest.cpp

@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HbFontCache.h"
+
+#include <android/log.h>
+#include <gtest/gtest.h>
+#include <utils/Mutex.h>
+
+#include <memory>
+
+#include <hb.h>
+
+#include "MinikinInternal.h"
+#include "MinikinFontForTest.h"
+#include <minikin/MinikinFont.h>
+
+namespace minikin {
+
+class HbFontCacheTest : public testing::Test {
+public:
+    virtual void TearDown() {
+        android::AutoMutex _l(gMinikinLock);
+        purgeHbFontCacheLocked();
+    }
+};
+
+TEST_F(HbFontCacheTest, getHbFontLockedTest) {
+    std::shared_ptr<MinikinFontForTest> fontA(
+            new MinikinFontForTest(kTestFontDir "Regular.ttf"));
+
+    std::shared_ptr<MinikinFontForTest> fontB(
+            new MinikinFontForTest(kTestFontDir "Bold.ttf"));
+
+    std::shared_ptr<MinikinFontForTest> fontC(
+            new MinikinFontForTest(kTestFontDir "BoldItalic.ttf"));
+
+    android::AutoMutex _l(gMinikinLock);
+    // Never return NULL.
+    EXPECT_NE(nullptr, getHbFontLocked(fontA.get()));
+    EXPECT_NE(nullptr, getHbFontLocked(fontB.get()));
+    EXPECT_NE(nullptr, getHbFontLocked(fontC.get()));
+
+    EXPECT_NE(nullptr, getHbFontLocked(nullptr));
+
+    // Must return same object if same font object is passed.
+    EXPECT_EQ(getHbFontLocked(fontA.get()), getHbFontLocked(fontA.get()));
+    EXPECT_EQ(getHbFontLocked(fontB.get()), getHbFontLocked(fontB.get()));
+    EXPECT_EQ(getHbFontLocked(fontC.get()), getHbFontLocked(fontC.get()));
+
+    // Different object must be returned if the passed minikinFont has different ID.
+    EXPECT_NE(getHbFontLocked(fontA.get()), getHbFontLocked(fontB.get()));
+    EXPECT_NE(getHbFontLocked(fontA.get()), getHbFontLocked(fontC.get()));
+}
+
+TEST_F(HbFontCacheTest, purgeCacheTest) {
+    std::shared_ptr<MinikinFontForTest> minikinFont(
+            new MinikinFontForTest(kTestFontDir "Regular.ttf"));
+
+    android::AutoMutex _l(gMinikinLock);
+    hb_font_t* font = getHbFontLocked(minikinFont.get());
+    ASSERT_NE(nullptr, font);
+
+    // Set user data to identify the font object.
+    hb_user_data_key_t key;
+    void* data = (void*)0xdeadbeef;
+    hb_font_set_user_data(font, &key, data, NULL, false);
+    ASSERT_EQ(data, hb_font_get_user_data(font, &key));
+
+    purgeHbFontCacheLocked();
+
+    // By checking user data, confirm that the object after purge is different from previously
+    // created one. Do not compare the returned pointer here since memory allocator may assign
+    // same region for new object.
+    font = getHbFontLocked(minikinFont.get());
+    EXPECT_EQ(nullptr, hb_font_get_user_data(font, &key));
+}
+
+}  // namespace minikin

diff --git a/tests/unittest/HyphenatorTest.cpp b/tests/unittest/HyphenatorTest.cpp
new file mode 100644
index 0000000..ecd58a2
--- /dev/null
+++ b/tests/unittest/HyphenatorTest.cpp

@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ICUTestBase.h"
+#include <minikin/Hyphenator.h>
+#include <FileUtils.h>
+
+#ifndef NELEM
+#define NELEM(x) ((sizeof(x) / sizeof((x)[0])))
+#endif
+
+namespace minikin {
+
+const char* usHyph = "/system/usr/hyphen-data/hyph-en-us.hyb";
+const char* malayalamHyph = "/system/usr/hyphen-data/hyph-ml.hyb";
+
+typedef ICUTestBase HyphenatorTest;
+
+const icu::Locale catalanLocale("ca", "ES", nullptr, nullptr);
+const icu::Locale polishLocale("pl", "PL", nullptr, nullptr);
+const icu::Locale& usLocale = icu::Locale::getUS();
+
+const uint16_t HYPHEN_MINUS = 0x002D;
+const uint16_t SOFT_HYPHEN = 0x00AD;
+const uint16_t MIDDLE_DOT = 0x00B7;
+const uint16_t GREEK_LOWER_ALPHA = 0x03B1;
+const uint16_t ARMENIAN_AYB = 0x0531;
+const uint16_t HEBREW_ALEF = 0x05D0;
+const uint16_t ARABIC_ALEF = 0x0627;
+const uint16_t ARABIC_BEH = 0x0628;
+const uint16_t ARABIC_ZWARAKAY = 0x0659;
+const uint16_t MALAYALAM_KA = 0x0D15;
+const uint16_t UCAS_E = 0x1401;
+const uint16_t HYPHEN = 0x2010;
+const uint16_t EN_DASH = 0x2013;
+
+// Simple test for US English. This tests "table", which happens to be the in the exceptions list.
+TEST_F(HyphenatorTest, usEnglishAutomaticHyphenation) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(usHyph).data(), 2, 3);
+    const uint16_t word[] = {'t', 'a', 'b', 'l', 'e'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 5, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
+}
+
+// Catalan l·l should break as l-/l
+TEST_F(HyphenatorTest, catalanMiddleDot) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'l', 'l', MIDDLE_DOT, 'l', 'l'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale);
+    EXPECT_EQ((size_t) 5, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN, result[3]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
+}
+
+// Catalan l·l should not break if the word is too short.
+TEST_F(HyphenatorTest, catalanMiddleDotShortWord) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'l', MIDDLE_DOT, 'l'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
+}
+
+// If we break on a hyphen in Polish, the hyphen should be repeated on the next line.
+TEST_F(HyphenatorTest, polishHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'x', HYPHEN, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]);
+}
+
+// If the language is Polish but the script is not Latin, don't use Polish rules for hyphenation.
+TEST_F(HyphenatorTest, polishHyphenButNonLatinWord) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {GREEK_LOWER_ALPHA, HYPHEN, GREEK_LOWER_ALPHA};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
+}
+
+// Polish en dash doesn't repeat on next line (as far as we know), but just provides a break
+// opportunity.
+TEST_F(HyphenatorTest, polishEnDash) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'x', EN_DASH, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
+}
+
+// In Latin script text, soft hyphens should insert a visible hyphen if broken at.
+TEST_F(HyphenatorTest, latinSoftHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'x', SOFT_HYPHEN, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
+}
+
+// Soft hyphens at the beginning of a word are not useful in linebreaking.
+TEST_F(HyphenatorTest, latinSoftHyphenStartingTheWord) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {SOFT_HYPHEN, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 2, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+}
+
+// In Malayalam script text, soft hyphens should not insert a visible hyphen if broken at.
+TEST_F(HyphenatorTest, malayalamSoftHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {MALAYALAM_KA, SOFT_HYPHEN, MALAYALAM_KA};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
+}
+
+// In automatically hyphenated Malayalam script text, we should not insert a visible hyphen.
+TEST_F(HyphenatorTest, malayalamAutomaticHyphenation) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(malayalamHyph).data(), 2, 2);
+    const uint16_t word[] = {
+            MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 5, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[3]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
+}
+
+// In Armenian script text, soft hyphens should insert an Armenian hyphen if broken at.
+TEST_F(HyphenatorTest, aremenianSoftHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {ARMENIAN_AYB, SOFT_HYPHEN, ARMENIAN_AYB};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN, result[2]);
+}
+
+// In Hebrew script text, soft hyphens should insert a normal hyphen if broken at, for now.
+// We may need to change this to maqaf later.
+TEST_F(HyphenatorTest, hebrewSoftHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {HEBREW_ALEF, SOFT_HYPHEN, HEBREW_ALEF};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
+}
+
+// Soft hyphen between two Arabic letters that join should keep the joining
+// behavior when broken across lines.
+TEST_F(HyphenatorTest, arabicSoftHyphenConnecting) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {ARABIC_BEH, SOFT_HYPHEN, ARABIC_BEH};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[2]);
+}
+
+// Arabic letters may be joining on one side, but if it's the wrong side, we
+// should use the normal hyphen.
+TEST_F(HyphenatorTest, arabicSoftHyphenNonConnecting) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {ARABIC_ALEF, SOFT_HYPHEN, ARABIC_BEH};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
+}
+
+// Skip transparent characters until you find a non-transparent one.
+TEST_F(HyphenatorTest, arabicSoftHyphenSkipTransparents) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 5, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[3]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
+}
+
+// Skip transparent characters until you find a non-transparent one. If we get to one end without
+// finding anything, we are still non-joining.
+TEST_F(HyphenatorTest, arabicSoftHyphenTransparentsAtEnd) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 4, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[3]);
+}
+
+// Skip transparent characters until you find a non-transparent one. If we get to one end without
+// finding anything, we are still non-joining.
+TEST_F(HyphenatorTest, arabicSoftHyphenTransparentsAtStart) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 4, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
+}
+
+// In Unified Canadian Aboriginal script (UCAS) text, soft hyphens should insert a UCAS hyphen.
+TEST_F(HyphenatorTest, ucasSoftHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {UCAS_E, SOFT_HYPHEN, UCAS_E};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]);
+}
+
+// Presently, soft hyphen looks at the character after it to determine hyphenation type. This is a
+// little arbitrary, but let's test it anyway.
+TEST_F(HyphenatorTest, mixedScriptSoftHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'a', SOFT_HYPHEN, UCAS_E};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]);
+}
+
+// Hard hyphens provide a breaking opportunity with nothing extra inserted.
+TEST_F(HyphenatorTest, hardHyphen) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'x', HYPHEN, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
+}
+
+// Hyphen-minuses also provide a breaking opportunity with nothing extra inserted.
+TEST_F(HyphenatorTest, hyphenMinus) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {'x', HYPHEN_MINUS, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 3, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
+}
+
+// If the word starts with a hard hyphen or hyphen-minus, it doesn't make sense to break
+// it at that point.
+TEST_F(HyphenatorTest, startingHyphenMinus) {
+    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
+    const uint16_t word[] = {HYPHEN_MINUS, 'y'};
+    std::vector<HyphenationType> result;
+    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
+    EXPECT_EQ((size_t) 2, result.size());
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
+    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
+}
+
+}  // namespace minikin
+

diff --git a/tests/ICUTestBase.h b/tests/unittest/ICUTestBase.h
similarity index 96%
rename from tests/ICUTestBase.h
rename to tests/unittest/ICUTestBase.h
index 3bcfaf3..f915cf8 100644
--- a/tests/ICUTestBase.h
+++ b/tests/unittest/ICUTestBase.h

@@ -26,6 +26,8 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 
+namespace minikin {
+
 class ICUTestBase : public testing::Test {
 protected:
     virtual void SetUp() override {
@@ -48,5 +50,5 @@
     }
 };
 
-
+}  // namespace minikin
 #endif  //  MINIKIN_TEST_ICU_TEST_BASE_H

diff --git a/tests/unittest/LayoutTest.cpp b/tests/unittest/LayoutTest.cpp
new file mode 100644
index 0000000..1770d3a
--- /dev/null
+++ b/tests/unittest/LayoutTest.cpp

@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ICUTestBase.h"
+#include "minikin/FontCollection.h"
+#include "minikin/Layout.h"
+#include "../util/FontTestUtils.h"
+#include "../util/UnicodeUtils.h"
+
+const char* SYSTEM_FONT_PATH = "/system/fonts/";
+const char* SYSTEM_FONT_XML = "/system/etc/fonts.xml";
+
+namespace minikin {
+
+const float UNTOUCHED_MARKER = 1e+38;
+
+static void expectAdvances(std::vector<float> expected, float* advances, size_t length) {
+    EXPECT_LE(expected.size(), length);
+    for (size_t i = 0; i < expected.size(); ++i) {
+        EXPECT_EQ(expected[i], advances[i])
+                << i << "th element is different. Expected: " << expected[i]
+                << ", Actual: " << advances[i];
+    }
+    EXPECT_EQ(UNTOUCHED_MARKER, advances[expected.size()]);
+}
+
+static void resetAdvances(float* advances, size_t length) {
+    for (size_t i = 0; i < length; ++i) {
+        advances[i] = UNTOUCHED_MARKER;
+    }
+}
+
+class LayoutTest : public ICUTestBase {
+protected:
+    LayoutTest() : mCollection(nullptr) {
+    }
+
+    virtual ~LayoutTest() {}
+
+    virtual void SetUp() override {
+        mCollection = std::shared_ptr<FontCollection>(
+                getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML));
+    }
+
+    virtual void TearDown() override {
+    }
+
+    std::shared_ptr<FontCollection> mCollection;
+};
+
+TEST_F(LayoutTest, doLayoutTest) {
+    MinikinPaint paint;
+    MinikinRect rect;
+    const size_t kMaxAdvanceLength = 32;
+    float advances[kMaxAdvanceLength];
+    std::vector<float> expectedValues;
+
+    Layout layout;
+    std::vector<uint16_t> text;
+
+    // The mock implementation returns 10.0f advance and 0,0-10x10 bounds for all glyph.
+    {
+        SCOPED_TRACE("one word");
+        text = utf8ToUtf16("oneword");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(70.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(70.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("two words");
+        text = utf8ToUtf16("two words");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(90.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(90.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("three words");
+        text = utf8ToUtf16("three words test");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(160.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(160.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("two spaces");
+        text = utf8ToUtf16("two  spaces");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(110.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(110.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+}
+
+TEST_F(LayoutTest, doLayoutTest_wordSpacing) {
+    MinikinPaint paint;
+    MinikinRect rect;
+    const size_t kMaxAdvanceLength = 32;
+    float advances[kMaxAdvanceLength];
+    std::vector<float> expectedValues;
+    std::vector<uint16_t> text;
+
+    Layout layout;
+
+    paint.wordSpacing = 5.0f;
+
+    // The mock implementation returns 10.0f advance and 0,0-10x10 bounds for all glyph.
+    {
+        SCOPED_TRACE("one word");
+        text = utf8ToUtf16("oneword");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(70.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(70.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("two words");
+        text = utf8ToUtf16("two words");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(95.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(95.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        EXPECT_EQ(UNTOUCHED_MARKER, advances[text.size()]);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectedValues[3] = 15.0f;
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("three words test");
+        text = utf8ToUtf16("three words test");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(170.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(170.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectedValues[5] = 15.0f;
+        expectedValues[11] = 15.0f;
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("two spaces");
+        text = utf8ToUtf16("two  spaces");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(120.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(120.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectedValues[3] = 15.0f;
+        expectedValues[4] = 15.0f;
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+}
+
+TEST_F(LayoutTest, doLayoutTest_negativeWordSpacing) {
+    MinikinPaint paint;
+    MinikinRect rect;
+    const size_t kMaxAdvanceLength = 32;
+    float advances[kMaxAdvanceLength];
+    std::vector<float> expectedValues;
+
+    Layout layout;
+    std::vector<uint16_t> text;
+
+    // Negative word spacing also should work.
+    paint.wordSpacing = -5.0f;
+
+    {
+        SCOPED_TRACE("one word");
+        text = utf8ToUtf16("oneword");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(70.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(70.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("two words");
+        text = utf8ToUtf16("two words");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(85.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(85.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectedValues[3] = 5.0f;
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("three words");
+        text = utf8ToUtf16("three word test");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(140.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(140.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectedValues[5] = 5.0f;
+        expectedValues[10] = 5.0f;
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+    {
+        SCOPED_TRACE("two spaces");
+        text = utf8ToUtf16("two  spaces");
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(100.0f, layout.getAdvance());
+        layout.getBounds(&rect);
+        EXPECT_EQ(0.0f, rect.mLeft);
+        EXPECT_EQ(0.0f, rect.mTop);
+        EXPECT_EQ(100.0f, rect.mRight);
+        EXPECT_EQ(10.0f, rect.mBottom);
+        resetAdvances(advances, kMaxAdvanceLength);
+        layout.getAdvances(advances);
+        expectedValues.resize(text.size());
+        for (size_t i = 0; i < expectedValues.size(); ++i) {
+            expectedValues[i] = 10.0f;
+        }
+        expectedValues[3] = 5.0f;
+        expectedValues[4] = 5.0f;
+        expectAdvances(expectedValues, advances, kMaxAdvanceLength);
+    }
+}
+
+TEST_F(LayoutTest, doLayoutTest_rtlTest) {
+    MinikinPaint paint;
+
+    std::vector<uint16_t> text = parseUnicodeString("'a' 'b' U+3042 U+3043 'c' 'd'");
+
+    Layout ltrLayout;
+    ltrLayout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+            mCollection);
+
+    Layout rtlLayout;
+    rtlLayout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_RTL, FontStyle(), paint,
+            mCollection);
+
+    ASSERT_EQ(ltrLayout.nGlyphs(), rtlLayout.nGlyphs());
+    ASSERT_EQ(6u, ltrLayout.nGlyphs());
+
+    size_t nGlyphs = ltrLayout.nGlyphs();
+    for (size_t i = 0; i < nGlyphs; ++i) {
+        EXPECT_EQ(ltrLayout.getFont(i), rtlLayout.getFont(nGlyphs - i - 1));
+        EXPECT_EQ(ltrLayout.getGlyphId(i), rtlLayout.getGlyphId(nGlyphs - i - 1));
+    }
+}
+
+TEST_F(LayoutTest, hyphenationTest) {
+    Layout layout;
+    std::vector<uint16_t> text;
+
+    // The mock implementation returns 10.0f advance for all glyphs.
+    {
+        SCOPED_TRACE("one word with no hyphen edit");
+        text = utf8ToUtf16("oneword");
+        MinikinPaint paint;
+        paint.hyphenEdit = HyphenEdit::NO_EDIT;
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(70.0f, layout.getAdvance());
+    }
+    {
+        SCOPED_TRACE("one word with hyphen insertion at the end");
+        text = utf8ToUtf16("oneword");
+        MinikinPaint paint;
+        paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_END;
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(80.0f, layout.getAdvance());
+    }
+    {
+        SCOPED_TRACE("one word with hyphen replacement at the end");
+        text = utf8ToUtf16("oneword");
+        MinikinPaint paint;
+        paint.hyphenEdit = HyphenEdit::REPLACE_WITH_HYPHEN_AT_END;
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(70.0f, layout.getAdvance());
+    }
+    {
+        SCOPED_TRACE("one word with hyphen insertion at the start");
+        text = utf8ToUtf16("oneword");
+        MinikinPaint paint;
+        paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_START;
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(80.0f, layout.getAdvance());
+    }
+    {
+        SCOPED_TRACE("one word with hyphen insertion at the both ends");
+        text = utf8ToUtf16("oneword");
+        MinikinPaint paint;
+        paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_START | HyphenEdit::INSERT_HYPHEN_AT_END;
+        layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint,
+                mCollection);
+        EXPECT_EQ(90.0f, layout.getAdvance());
+    }
+}
+
+// TODO: Add more test cases, e.g. measure text, letter spacing.
+
+}  // namespace minikin

diff --git a/tests/LayoutUtilsTest.cpp b/tests/unittest/LayoutUtilsTest.cpp
similarity index 99%
rename from tests/LayoutUtilsTest.cpp
rename to tests/unittest/LayoutUtilsTest.cpp
index f4fbb18..e7e6c27 100644
--- a/tests/LayoutUtilsTest.cpp
+++ b/tests/unittest/LayoutUtilsTest.cpp

@@ -19,7 +19,7 @@
 
 #include "LayoutUtils.h"
 
-namespace {
+namespace minikin {
 
 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
     const size_t BUF_SIZE = 256U;
@@ -507,4 +507,4 @@
     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
 }
 
-}  // namespace
+}  // namespace minikin

diff --git a/tests/unittest/MeasurementTests.cpp b/tests/unittest/MeasurementTests.cpp
new file mode 100644
index 0000000..7fedecb
--- /dev/null
+++ b/tests/unittest/MeasurementTests.cpp

@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <UnicodeUtils.h>
+#include <minikin/Measurement.h>
+
+namespace minikin {
+
+float getAdvance(const float* advances, const char* src) {
+    const size_t BUF_SIZE = 256;
+    uint16_t buf[BUF_SIZE];
+    size_t offset;
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
+    return getRunAdvance(advances, buf, 0, size, offset);
+}
+
+// Latin fi
+TEST(Measurement, getRunAdvance_fi) {
+    const float unligated[] = {30.0, 20.0};
+    EXPECT_EQ(0.0, getAdvance(unligated, "| 'f' 'i'"));
+    EXPECT_EQ(30.0, getAdvance(unligated, "'f' | 'i'"));
+    EXPECT_EQ(50.0, getAdvance(unligated, "'f' 'i' |"));
+
+    const float ligated[] = {40.0, 0.0};
+    EXPECT_EQ(0.0, getAdvance(ligated, "| 'f' 'i'"));
+    EXPECT_EQ(20.0, getAdvance(ligated, "'f' | 'i'"));
+    EXPECT_EQ(40.0, getAdvance(ligated, "'f' 'i' |"));
+}
+
+// Devanagari ka+virama+ka
+TEST(Measurement, getRunAdvance_kka) {
+    const float unligated[] = {30.0, 0.0, 30.0};
+    EXPECT_EQ(0.0, getAdvance(unligated, "| U+0915 U+094D U+0915"));
+    EXPECT_EQ(30.0, getAdvance(unligated, "U+0915 | U+094D U+0915"));
+    EXPECT_EQ(30.0, getAdvance(unligated, "U+0915 U+094D | U+0915"));
+    EXPECT_EQ(60.0, getAdvance(unligated, "U+0915 U+094D U+0915 |"));
+
+    const float ligated[] = {30.0, 0.0, 0.0};
+    EXPECT_EQ(0.0, getAdvance(ligated, "| U+0915 U+094D U+0915"));
+    EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 | U+094D U+0915"));
+    EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D | U+0915"));
+    EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D U+0915 |"));
+}
+
+}  // namespace minikin

diff --git a/tests/unittest/SparseBitSetTest.cpp b/tests/unittest/SparseBitSetTest.cpp
new file mode 100644
index 0000000..39c9e1b
--- /dev/null
+++ b/tests/unittest/SparseBitSetTest.cpp

@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <random>
+
+#include <gtest/gtest.h>
+#include <minikin/SparseBitSet.h>
+
+namespace minikin {
+
+TEST(SparseBitSetTest, randomTest) {
+    const uint32_t kTestRangeNum = 4096;
+
+    std::mt19937 mt;  // Fix seeds to be able to reproduce the result.
+    std::uniform_int_distribution<uint16_t> distribution(1, 512);
+
+    std::vector<uint32_t> range { distribution(mt) };
+    for (size_t i = 1; i < kTestRangeNum * 2; ++i) {
+        range.push_back((range.back() - 1) + distribution(mt));
+    }
+
+    SparseBitSet bitset(range.data(), range.size() / 2);
+
+    uint32_t ch = 0;
+    for (size_t i = 0; i < range.size() / 2; ++i) {
+        uint32_t start = range[i * 2];
+        uint32_t end = range[i * 2 + 1];
+
+        for (; ch < start; ch++) {
+            ASSERT_FALSE(bitset.get(ch)) << std::hex << ch;
+        }
+        for (; ch < end; ch++) {
+            ASSERT_TRUE(bitset.get(ch)) << std::hex << ch;
+        }
+    }
+    for (; ch < 0x1FFFFFF; ++ch) {
+        ASSERT_FALSE(bitset.get(ch)) << std::hex << ch;
+    }
+}
+
+}  // namespace minikin

diff --git a/tests/unittest/UnicodeUtilsTest.cpp b/tests/unittest/UnicodeUtilsTest.cpp
new file mode 100644
index 0000000..9932723
--- /dev/null
+++ b/tests/unittest/UnicodeUtilsTest.cpp

@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "UnicodeUtils.h"
+
+namespace minikin {
+
+TEST(UnicodeUtils, parse) {
+    const size_t BUF_SIZE = 256;
+    uint16_t buf[BUF_SIZE];
+    size_t offset;
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset);
+    EXPECT_EQ(size, 4u);
+    EXPECT_EQ(offset, 3u);
+    EXPECT_EQ(buf[0], 0x000D);
+    EXPECT_EQ(buf[1], 0xD83D);
+    EXPECT_EQ(buf[2], 0xDC31);
+    EXPECT_EQ(buf[3], 'a');
+}
+
+} // namespace minikin

diff --git a/tests/WordBreakerTests.cpp b/tests/unittest/WordBreakerTests.cpp
similarity index 77%
rename from tests/WordBreakerTests.cpp
rename to tests/unittest/WordBreakerTests.cpp
index 8ed87cc..13e0420 100644
--- a/tests/WordBreakerTests.cpp
+++ b/tests/unittest/WordBreakerTests.cpp

@@ -14,7 +14,11 @@
  * limitations under the License.
  */
 
+#define LOG_TAG "Minikin"
+
+#include <android/log.h>
 #include <gtest/gtest.h>
+
 #include "ICUTestBase.h"
 #include "UnicodeUtils.h"
 #include <minikin/WordBreaker.h>
@@ -22,23 +26,20 @@
 #include <unicode/uclean.h>
 #include <unicode/udata.h>
 
-#define LOG_TAG "Minikin"
-#include <cutils/log.h>
-
 #ifndef NELEM
 #define NELEM(x) ((sizeof(x) / sizeof((x)[0])))
 #endif
 
 #define UTF16(codepoint) U16_LEAD(codepoint), U16_TRAIL(codepoint)
 
-using namespace android;
+namespace minikin {
 
 typedef ICUTestBase WordBreakerTest;
 
 TEST_F(WordBreakerTest, basic) {
     uint16_t buf[] = {'h', 'e', 'l', 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(6, breaker.next());  // after "hello "
@@ -56,7 +57,7 @@
 TEST_F(WordBreakerTest, softHyphen) {
     uint16_t buf[] = {'h', 'e', 'l', 0x00AD, 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(7, breaker.next());  // after "hel{SOFT HYPHEN}lo "
@@ -69,10 +70,23 @@
     EXPECT_EQ(0, breaker.breakBadness());
 }
 
+TEST_F(WordBreakerTest, hardHyphen) {
+    // Hyphens should not allow breaks anymore.
+    uint16_t buf[] = {'s', 'u', 'g', 'a', 'r', '-', 'f', 'r', 'e', 'e'};
+    WordBreaker breaker;
+    breaker.setLocale(icu::Locale::getUS());
+    breaker.setText(buf, NELEM(buf));
+    EXPECT_EQ(0, breaker.current());
+    EXPECT_EQ((ssize_t)NELEM(buf), breaker.next());
+    EXPECT_EQ(0, breaker.wordStart());
+    EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd());
+    EXPECT_EQ(0, breaker.breakBadness());
+}
+
 TEST_F(WordBreakerTest, postfixAndPrefix) {
     uint16_t buf[] = {'U', 'S', 0x00A2, ' ', 'J', 'P', 0x00A5}; // US¢ JP¥
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
 
@@ -85,7 +99,7 @@
     EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd());
 }
 
-TEST_F(WordBreakerTest, MyanmarKinzi) {
+TEST_F(WordBreakerTest, myanmarKinzi) {
     uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C};  // NGA, ASAT, VIRAMA, KA, UU
     WordBreaker breaker;
     icu::Locale burmese("my");
@@ -110,7 +124,7 @@
         UTF16(0x1F431), 0x200D, UTF16(0x1F464),
     };
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(7, breaker.next());  // after man + zwj + heart + zwj + man
@@ -133,10 +147,10 @@
         0x270C, 0xFE0F, UTF16(0x1F3FF)  // victory hand + emoji style + type 6 fitzpatrick modifier
     };
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
-    EXPECT_EQ(4, breaker.next());  // after man + type 6 fitzpatrick modifier
+    EXPECT_EQ(4, breaker.next());  // after boy + type 1-2 fitzpatrick modifier
     EXPECT_EQ(0, breaker.wordStart());
     EXPECT_EQ(4, breaker.wordEnd());
     EXPECT_EQ((ssize_t)NELEM(buf), breaker.next());  // end
@@ -144,11 +158,108 @@
     EXPECT_EQ(8, breaker.wordEnd());
 }
 
+TEST_F(WordBreakerTest, unicode10Emoji) {
+    // Should break between emojis.
+    uint16_t buf[] = {
+        // SLED + SLED
+        UTF16(0x1F6F7), UTF16(0x1F6F7),
+        // SLED + VS15 + SLED
+        UTF16(0x1F6F7), 0xFE0E, UTF16(0x1F6F7),
+        // WHITE SMILING FACE + SLED
+        0x263A, UTF16(0x1F6F7),
+        // WHITE SMILING FACE + VS16 + SLED
+        0x263A, 0xFE0F, UTF16(0x1F6F7),
+    };
+    WordBreaker breaker;
+    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setText(buf, NELEM(buf));
+    EXPECT_EQ(0, breaker.current());
+    EXPECT_EQ(2, breaker.next());
+    EXPECT_EQ(0, breaker.wordStart());
+    EXPECT_EQ(2, breaker.wordEnd());
+
+    EXPECT_EQ(4, breaker.next());
+    EXPECT_EQ(2, breaker.wordStart());
+    EXPECT_EQ(4, breaker.wordEnd());
+
+    EXPECT_EQ(7, breaker.next());
+    EXPECT_EQ(4, breaker.wordStart());
+    EXPECT_EQ(7, breaker.wordEnd());
+
+    EXPECT_EQ(9, breaker.next());
+    EXPECT_EQ(7, breaker.wordStart());
+    EXPECT_EQ(9, breaker.wordEnd());
+
+    EXPECT_EQ(10, breaker.next());
+    EXPECT_EQ(9, breaker.wordStart());
+    EXPECT_EQ(10, breaker.wordEnd());
+
+    EXPECT_EQ(12, breaker.next());
+    EXPECT_EQ(10, breaker.wordStart());
+    EXPECT_EQ(12, breaker.wordEnd());
+
+    EXPECT_EQ(14, breaker.next());
+    EXPECT_EQ(12, breaker.wordStart());
+    EXPECT_EQ(14, breaker.wordEnd());
+
+    EXPECT_EQ(16, breaker.next());
+    EXPECT_EQ(14, breaker.wordStart());
+    EXPECT_EQ(16, breaker.wordEnd());
+}
+
+TEST_F(WordBreakerTest, flagsSequenceSingleFlag) {
+    const std::string kFlag = "U+1F3F4";
+    const std::string flags = kFlag + " " + kFlag;
+
+    const int kFlagLength = 2;
+    const size_t BUF_SIZE = kFlagLength * 2;
+
+    uint16_t buf[BUF_SIZE];
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, flags.c_str(), &size, nullptr);
+
+    WordBreaker breaker;
+    breaker.setLocale(icu::Locale::getUS());
+    breaker.setText(buf, size);
+    EXPECT_EQ(0, breaker.current());
+    EXPECT_EQ(kFlagLength, breaker.next());  // end of the first flag
+    EXPECT_EQ(0, breaker.wordStart());
+    EXPECT_EQ(kFlagLength, breaker.wordEnd());
+    EXPECT_EQ(static_cast<ssize_t>(size), breaker.next());
+    EXPECT_EQ(kFlagLength, breaker.wordStart());
+    EXPECT_EQ(kFlagLength * 2, breaker.wordEnd());
+}
+
+TEST_F(WordBreakerTest, flagsSequence) {
+    // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag sequence for the flag
+    // of Scotland.
+    const std::string kFlagSequence = "U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F";
+    const std::string flagSequence = kFlagSequence + " " + kFlagSequence;
+
+    const int kFlagLength = 14;
+    const size_t BUF_SIZE = kFlagLength * 2;
+
+    uint16_t buf[BUF_SIZE];
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, flagSequence.c_str(), &size, nullptr);
+
+    WordBreaker breaker;
+    breaker.setLocale(icu::Locale::getUS());
+    breaker.setText(buf, size);
+    EXPECT_EQ(0, breaker.current());
+    EXPECT_EQ(kFlagLength, breaker.next());  // end of the first flag sequence
+    EXPECT_EQ(0, breaker.wordStart());
+    EXPECT_EQ(kFlagLength, breaker.wordEnd());
+    EXPECT_EQ(static_cast<ssize_t>(size), breaker.next());
+    EXPECT_EQ(kFlagLength, breaker.wordStart());
+    EXPECT_EQ(kFlagLength * 2, breaker.wordEnd());
+}
+
 TEST_F(WordBreakerTest, punct) {
     uint16_t buf[] = {0x00A1, 0x00A1, 'h', 'e', 'l', 'l' ,'o', ',', ' ', 'w', 'o', 'r', 'l', 'd',
         '!', '!'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(9, breaker.next());  // after "¡¡hello, "
@@ -165,7 +276,7 @@
     uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm',
         ' ', 'x'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(11, breaker.next());  // after "foo@example"
@@ -184,7 +295,7 @@
     uint16_t buf[] = {'m', 'a', 'i', 'l', 't', 'o', ':', 'f', 'o', 'o', '@',
         'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', ' ', 'x'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(7, breaker.next());  // after "mailto:"
@@ -208,7 +319,7 @@
     uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm',
         0x4E00};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(11, breaker.next());  // after "foo@example"
@@ -227,7 +338,7 @@
     uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm',
         0x0303, ' ', 'x'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(11, breaker.next());  // after "foo@example"
@@ -245,7 +356,7 @@
 TEST_F(WordBreakerTest, lonelyAt) {
     uint16_t buf[] = {'a', ' ', '@', ' ', 'b'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(2, breaker.next());  // after "a "
@@ -265,7 +376,7 @@
     uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'e', 'x', 'a', 'm', 'p', 'l', 'e',
         '.', 'c', 'o', 'm', ' ', 'x'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(5, breaker.next());  // after "http:"
@@ -291,7 +402,7 @@
     uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '.', 'b', '/', '~', 'c', ',', 'd',
         '-', 'e', '?', 'f', '=', 'g', '&', 'h', '#', 'i', '%', 'j', '_', 'k', '/', 'l'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(5, breaker.next());  // after "http:"
@@ -350,7 +461,7 @@
 TEST_F(WordBreakerTest, urlNoHyphenBreak) {
     uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '-', '/', 'b'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(5, breaker.next());  // after "http:"
@@ -366,7 +477,7 @@
 TEST_F(WordBreakerTest, urlEndsWithSlash) {
     uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '/'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ(5, breaker.next());  // after "http:"
@@ -382,9 +493,11 @@
 TEST_F(WordBreakerTest, emailStartsWithSlash) {
     uint16_t buf[] = {'/', 'a', '@', 'b'};
     WordBreaker breaker;
-    breaker.setLocale(icu::Locale::getEnglish());
+    breaker.setLocale(icu::Locale::getUS());
     breaker.setText(buf, NELEM(buf));
     EXPECT_EQ(0, breaker.current());
     EXPECT_EQ((ssize_t)NELEM(buf), breaker.next());  // end
     EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
 }
+
+}  // namespace minikin

diff --git a/tests/unittest/how_to_run.txt b/tests/unittest/how_to_run.txt
new file mode 100644
index 0000000..20aa5ab
--- /dev/null
+++ b/tests/unittest/how_to_run.txt

@@ -0,0 +1,3 @@
+mmm -j8 frameworks/minikin/tests/unittest &&
+adb sync data &&
+adb shell /data/nativetest/minikin_tests/minikin_tests

diff --git a/tests/util/FileUtils.cpp b/tests/util/FileUtils.cpp
new file mode 100644
index 0000000..68cc45c
--- /dev/null
+++ b/tests/util/FileUtils.cpp

@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cutils/log.h>
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include <string>
+#include <vector>
+
+std::vector<uint8_t> readWholeFile(const std::string& filePath) {
+    FILE* fp = fopen(filePath.c_str(), "r");
+    LOG_ALWAYS_FATAL_IF(fp == nullptr);
+    struct stat st;
+    LOG_ALWAYS_FATAL_IF(fstat(fileno(fp), &st) != 0);
+
+    std::vector<uint8_t> result(st.st_size);
+    LOG_ALWAYS_FATAL_IF(fread(result.data(), 1, st.st_size, fp) != static_cast<size_t>(st.st_size));
+    fclose(fp);
+    return result;
+}

diff --git a/tests/UnicodeUtils.h b/tests/util/FileUtils.h
similarity index 84%
rename from tests/UnicodeUtils.h
rename to tests/util/FileUtils.h
index 4f1b06a..1e66d1b 100644
--- a/tests/UnicodeUtils.h
+++ b/tests/util/FileUtils.h

@@ -14,5 +14,5 @@
  * limitations under the License.
  */
 
- void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
-        size_t* offset);
+std::vector<uint8_t> readWholeFile(const std::string& filePath);
+

diff --git a/tests/util/FontTestUtils.cpp b/tests/util/FontTestUtils.cpp
new file mode 100644
index 0000000..13360d4
--- /dev/null
+++ b/tests/util/FontTestUtils.cpp

@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "Minikin"
+
+#include <libxml/tree.h>
+#include <unistd.h>
+
+#include <log/log.h>
+
+#include "FontLanguage.h"
+#include "MinikinFontForTest.h"
+#include <minikin/FontCollection.h>
+#include <minikin/FontFamily.h>
+
+namespace minikin {
+
+std::vector<std::shared_ptr<FontFamily>> getFontFamilies(const char* fontDir, const char* fontXml) {
+    xmlDoc* doc = xmlReadFile(fontXml, NULL, 0);
+    xmlNode* familySet = xmlDocGetRootElement(doc);
+
+    std::vector<std::shared_ptr<FontFamily>> families;
+    for (xmlNode* familyNode = familySet->children; familyNode; familyNode = familyNode->next) {
+        if (xmlStrcmp(familyNode->name, (const xmlChar*)"family") != 0) {
+            continue;
+        }
+
+        xmlChar* variantXmlch = xmlGetProp(familyNode, (const xmlChar*)"variant");
+        int variant = VARIANT_DEFAULT;
+        if (variantXmlch) {
+            if (xmlStrcmp(variantXmlch, (const xmlChar*)"elegant") == 0) {
+                variant = VARIANT_ELEGANT;
+            } else if (xmlStrcmp(variantXmlch, (const xmlChar*)"compact") == 0) {
+                variant = VARIANT_COMPACT;
+            }
+        }
+
+        std::vector<Font> fonts;
+        for (xmlNode* fontNode = familyNode->children; fontNode; fontNode = fontNode->next) {
+            if (xmlStrcmp(fontNode->name, (const xmlChar*)"font") != 0) {
+                continue;
+            }
+
+            int weight = atoi((const char*)(xmlGetProp(fontNode, (const xmlChar*)"weight"))) / 100;
+            bool italic = xmlStrcmp(
+                    xmlGetProp(fontNode, (const xmlChar*)"style"), (const xmlChar*)"italic") == 0;
+            xmlChar* index = xmlGetProp(familyNode, (const xmlChar*)"index");
+
+            xmlChar* fontFileName = xmlNodeListGetString(doc, fontNode->xmlChildrenNode, 1);
+            std::string fontPath = fontDir + std::string((const char*)fontFileName);
+            xmlFree(fontFileName);
+
+            if (access(fontPath.c_str(), R_OK) != 0) {
+                ALOGW("%s is not found.", fontPath.c_str());
+                continue;
+            }
+
+            if (index == nullptr) {
+                std::shared_ptr<MinikinFont> minikinFont =
+                        std::make_shared<MinikinFontForTest>(fontPath);
+                fonts.push_back(Font(minikinFont, FontStyle(weight, italic)));
+            } else {
+                std::shared_ptr<MinikinFont> minikinFont =
+                        std::make_shared<MinikinFontForTest>(fontPath, atoi((const char*)index));
+                fonts.push_back(Font(minikinFont, FontStyle(weight, italic)));
+            }
+        }
+
+        xmlChar* lang = xmlGetProp(familyNode, (const xmlChar*)"lang");
+        std::shared_ptr<FontFamily> family;
+        if (lang == nullptr) {
+            family = std::make_shared<FontFamily>(variant, std::move(fonts));
+        } else {
+            uint32_t langId = FontStyle::registerLanguageList(
+                    std::string((const char*)lang, xmlStrlen(lang)));
+            family = std::make_shared<FontFamily>(langId, variant, std::move(fonts));
+        }
+        families.push_back(family);
+    }
+    xmlFreeDoc(doc);
+    return families;
+}
+std::shared_ptr<FontCollection> getFontCollection(const char* fontDir, const char* fontXml) {
+    return std::make_shared<FontCollection>(getFontFamilies(fontDir, fontXml));
+}
+
+}  // namespace minikin

diff --git a/tests/FontTestUtils.h b/tests/util/FontTestUtils.h
similarity index 60%
rename from tests/FontTestUtils.h
rename to tests/util/FontTestUtils.h
index 5258a76..dd5e586 100644
--- a/tests/FontTestUtils.h
+++ b/tests/util/FontTestUtils.h

@@ -19,14 +19,28 @@
 
 #include <minikin/FontCollection.h>
 
+#include <memory>
+
+namespace minikin {
+
 /**
- * Returns FontCollection from installed fonts.
+ * Returns list of FontFamily from installed fonts.
  *
- * This function reads /system/etc/fonts.xml and make font families and
- * collections of them. MinikinFontForTest is used for FontFamily creation.
+ * This function reads an XML file and makes font families.
  *
  * Caller must unref the returned pointer.
  */
-android::FontCollection* getFontCollection(const char* fontDir, const char* fontXml);
+std::vector<std::shared_ptr<FontFamily>> getFontFamilies(const char* fontDir, const char* fontXml);
 
+/**
+ * Returns FontCollection from installed fonts.
+ *
+ * This function reads an XML file and makes font families and collections of them.
+ * MinikinFontForTest is used for FontFamily creation.
+ *
+ * Caller must unref the returned pointer.
+ */
+std::shared_ptr<FontCollection> getFontCollection(const char* fontDir, const char* fontXml);
+
+}  // namespace minikin
 #endif  // MINIKIN_FONT_TEST_UTILS_H

diff --git a/tests/util/MinikinFontForTest.cpp b/tests/util/MinikinFontForTest.cpp
new file mode 100644
index 0000000..723e86a
--- /dev/null
+++ b/tests/util/MinikinFontForTest.cpp

@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "Minikin"
+
+#include "MinikinFontForTest.h"
+
+#include <minikin/MinikinFont.h>
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include <log/log.h>
+
+namespace minikin {
+
+static int uniqueId = 0;  // TODO: make thread safe if necessary.
+
+MinikinFontForTest::MinikinFontForTest(const std::string& font_path, int index,
+        const std::vector<FontVariation>& variations) :
+        MinikinFont(uniqueId++),
+        mFontPath(font_path),
+        mVariations(variations),
+        mFontIndex(index) {
+    int fd = open(font_path.c_str(), O_RDONLY);
+    LOG_ALWAYS_FATAL_IF(fd == -1);
+    struct stat st = {};
+    LOG_ALWAYS_FATAL_IF(fstat(fd, &st) != 0);
+    mFontSize = st.st_size;
+    mFontData = mmap(NULL, mFontSize, PROT_READ, MAP_SHARED, fd, 0);
+    LOG_ALWAYS_FATAL_IF(mFontData == nullptr);
+    close(fd);
+}
+
+MinikinFontForTest::~MinikinFontForTest() {
+    munmap(mFontData, mFontSize);
+}
+
+float MinikinFontForTest::GetHorizontalAdvance(uint32_t /* glyph_id */,
+        const MinikinPaint& /* paint */) const {
+    // TODO: Make mock value configurable if necessary.
+    return 10.0f;
+}
+
+void MinikinFontForTest::GetBounds(MinikinRect* bounds, uint32_t /* glyph_id */,
+        const MinikinPaint& /* paint */) const {
+    // TODO: Make mock values configurable if necessary.
+    bounds->mLeft = 0.0f;
+    bounds->mTop = 0.0f;
+    bounds->mRight = 10.0f;
+    bounds->mBottom = 10.0f;
+}
+
+std::shared_ptr<MinikinFont> MinikinFontForTest::createFontWithVariation(
+        const std::vector<FontVariation>& variations) const {
+    return std::shared_ptr<MinikinFont>(new MinikinFontForTest(mFontPath, mFontIndex, variations));
+}
+
+}  // namespace minikin

diff --git a/tests/util/MinikinFontForTest.h b/tests/util/MinikinFontForTest.h
new file mode 100644
index 0000000..6e230e1
--- /dev/null
+++ b/tests/util/MinikinFontForTest.h

@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
+#define MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
+
+#include <minikin/MinikinFont.h>
+
+class SkTypeface;
+
+namespace minikin {
+
+class MinikinFontForTest : public MinikinFont {
+public:
+    MinikinFontForTest(const std::string& font_path, int index,
+            const std::vector<FontVariation>& variations);
+    MinikinFontForTest(const std::string& font_path, int index)
+            : MinikinFontForTest(font_path, index, std::vector<FontVariation>()) {}
+    MinikinFontForTest(const std::string& font_path) : MinikinFontForTest(font_path, 0) {}
+    virtual ~MinikinFontForTest();
+
+    // MinikinFont overrides.
+    float GetHorizontalAdvance(uint32_t glyph_id, const MinikinPaint &paint) const;
+    void GetBounds(MinikinRect* bounds, uint32_t glyph_id,
+            const MinikinPaint& paint) const;
+
+    const std::string& fontPath() const { return mFontPath; }
+
+    const void* GetFontData() const { return mFontData; }
+    size_t GetFontSize() const { return mFontSize; }
+    int GetFontIndex() const { return mFontIndex; }
+    const std::vector<minikin::FontVariation>& GetAxes() const { return mVariations; }
+    std::shared_ptr<MinikinFont> createFontWithVariation(
+            const std::vector<FontVariation>& variations) const;
+private:
+    MinikinFontForTest() = delete;
+    MinikinFontForTest(const MinikinFontForTest&) = delete;
+    MinikinFontForTest& operator=(MinikinFontForTest&) = delete;
+
+    const std::string mFontPath;
+    const std::vector<FontVariation> mVariations;
+    const int mFontIndex;
+    void* mFontData;
+    size_t mFontSize;
+};
+
+}  // namespace minikin
+
+#endif  // MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H

diff --git a/tests/util/UnicodeUtils.cpp b/tests/util/UnicodeUtils.cpp
new file mode 100644
index 0000000..e66ff93
--- /dev/null
+++ b/tests/util/UnicodeUtils.cpp

@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unicode/utf.h>
+#include <unicode/utf8.h>
+#include <cstdlib>
+#include <cutils/log.h>
+#include <vector>
+#include <string>
+
+namespace minikin {
+
+// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
+// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
+void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
+        size_t* offset) {
+    size_t input_ix = 0;
+    size_t output_ix = 0;
+    bool seen_offset = false;
+
+    while (src[input_ix] != 0) {
+        switch (src[input_ix]) {
+        case '\'':
+            // single ASCII char
+            LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
+            input_ix++;
+            LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
+            LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
+            buf[output_ix++] = (uint16_t)src[input_ix++];
+            LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
+            input_ix++;
+            break;
+        case 'u':
+        case 'U': {
+            // Unicode codepoint in hex syntax
+            input_ix++;
+            LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
+            input_ix++;
+            char* endptr = (char*)src + input_ix;
+            unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
+            size_t num_hex_digits = endptr - (src + input_ix);
+
+            // also triggers on invalid number syntax, digits = 0
+            LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
+            LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
+            LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
+            input_ix += num_hex_digits;
+            if (U16_LENGTH(codepoint) == 1) {
+                LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
+                buf[output_ix++] = codepoint;
+            } else {
+                // UTF-16 encoding
+                LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
+                buf[output_ix++] = U16_LEAD(codepoint);
+                buf[output_ix++] = U16_TRAIL(codepoint);
+            }
+            break;
+        }
+        case ' ':
+            input_ix++;
+            break;
+        case '|':
+            LOG_ALWAYS_FATAL_IF(seen_offset);
+            LOG_ALWAYS_FATAL_IF(offset == nullptr);
+            *offset = output_ix;
+            seen_offset = true;
+            input_ix++;
+            break;
+        default:
+            LOG_ALWAYS_FATAL("Unexpected Character");
+        }
+    }
+    LOG_ALWAYS_FATAL_IF(result_size == nullptr);
+    *result_size = output_ix;
+    LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
+}
+
+std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
+    std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
+    size_t result_size = 0;
+    ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
+    return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
+}
+
+std::vector<uint16_t> parseUnicodeString(const std::string& in) {
+    return parseUnicodeStringWithOffset(in, nullptr);
+}
+
+std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
+    std::vector<uint16_t> result;
+    int32_t i = 0;
+    const int32_t textLength = static_cast<int32_t>(text.size());
+    uint32_t c = 0;
+    while (i < textLength) {
+        U8_NEXT(text.c_str(), i, textLength, c);
+        if (U16_LENGTH(c) == 1) {
+            result.push_back(c);
+        } else {
+            result.push_back(U16_LEAD(c));
+            result.push_back(U16_TRAIL(c));
+        }
+    }
+    return result;
+}
+
+}  // namespace minikin

diff --git a/tests/UnicodeUtils.h b/tests/util/UnicodeUtils.h
similarity index 62%
copy from tests/UnicodeUtils.h
copy to tests/util/UnicodeUtils.h
index 4f1b06a..6ce2fcb 100644
--- a/tests/UnicodeUtils.h
+++ b/tests/util/UnicodeUtils.h

@@ -14,5 +14,15 @@
  * limitations under the License.
  */
 
- void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
+namespace minikin {
+
+void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
         size_t* offset);
+
+std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset);
+std::vector<uint16_t> parseUnicodeString(const std::string& in);
+
+// Converts UTF-8 to UTF-16.
+std::vector<uint16_t> utf8ToUtf16(const std::string& text);
+
+}  // namespace minikin

diff --git a/tools/mk_hyb_file.py b/tools/mk_hyb_file.py
index 978c082..a9b8932 100755
--- a/tools/mk_hyb_file.py
+++ b/tools/mk_hyb_file.py

@@ -539,6 +539,12 @@
     patterns = []
     exceptions = []
     traverse_trie(0, '', trie_data, ch_map, pattern_data, patterns, exceptions)
+
+    # EXCEPTION for Bulgarian (bg), which contains an ineffectual line of <0, U+044C, 0>
+    if u'\u044c' in patterns:
+        patterns.remove(u'\u044c')
+        patterns.append(u'0\u044c0')
+
     assert verify_file_sorted(patterns, pat_fn), 'pattern table not verified'
     assert verify_file_sorted(exceptions, hyp_fn), 'exception table not verified'
commit	d6d6828f166a3d01c61451e453ef3169b98ee2f5	[log] [tgz]
author	Seigo Nonaka <nona@google.com>	Wed Jul 12 19:18:31 2017 +0000
committer	android-build-merger <android-build-merger@google.com>	Wed Jul 12 19:18:31 2017 +0000
tree	7fd2dfe27fb8241ad9ef610a37ece5e1d43188c4
parent	d60d6464fa1815ac452a0c02a63f992b9f98e74b [diff]
parent	fa5c0e9773d282439272c2338d53ad517c7af42d [diff]