onCharsToGlyphs to handle non-bmp on Mac.

CTFontGetGlyphsForCharacters is a strange API for non-bmp code points.

R=caryclark@google.com

Review URL: https://codereview.chromium.org/43463005

git-svn-id: http://skia.googlecode.com/svn/trunk/src@11965 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/ports/SkFontHost_mac.cpp b/ports/SkFontHost_mac.cpp
index 0b77aa2..fb56119 100755
--- a/ports/SkFontHost_mac.cpp
+++ b/ports/SkFontHost_mac.cpp
@@ -946,19 +946,16 @@
 
 uint16_t SkScalerContext_Mac::generateCharToGlyph(SkUnichar uni) {
     CGGlyph cgGlyph[2];
-    UniChar theChar[2];
+    UniChar theChar[2]; // UniChar is a UTF-16 16-bit code unit.
 
     // Get the glyph
     size_t numUniChar = SkUTF16_FromUnichar(uni, theChar);
     SkASSERT(sizeof(CGGlyph) <= sizeof(uint16_t));
 
-    // Undocumented behavior of CTFontGetGlyphsForCharacters with non-bmp code points.
-    // When a surragate pair is detected, the glyph index used is the index of the first
-    // UniChar of the pair (the lower location).
-    if (!CTFontGetGlyphsForCharacters(fCTFont, theChar, cgGlyph, numUniChar)) {
-        cgGlyph[0] = 0;
-    }
-
+    // Undocumented behavior of CTFontGetGlyphsForCharacters with non-bmp code points:
+    // When a surrogate pair is detected, the glyph index used is the index of the high surrogate.
+    // It is documented that if a mapping is unavailable, the glyph will be set to 0.
+    CTFontGetGlyphsForCharacters(fCTFont, theChar, cgGlyph, numUniChar);
     return cgGlyph[0];
 }
 
@@ -1920,53 +1917,89 @@
 }
 
 int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding,
-                                    uint16_t glyphs[], int glyphCount) const {
-    // UniChar is utf16
+                                    uint16_t glyphs[], int glyphCount) const
+{
+    // Undocumented behavior of CTFontGetGlyphsForCharacters with non-bmp code points:
+    // When a surrogate pair is detected, the glyph index used is the index of the high surrogate.
+    // It is documented that if a mapping is unavailable, the glyph will be set to 0.
+    
     SkAutoSTMalloc<1024, UniChar> charStorage;
-    const UniChar* src;
+    const UniChar* src; // UniChar is a UTF-16 16-bit code unit.
+    int srcCount;
     switch (encoding) {
         case kUTF8_Encoding: {
-            const char* u8 = (const char*)chars;
-            const UniChar* u16 = src = charStorage.reset(2 * glyphCount);
+            const char* utf8 = reinterpret_cast<const char*>(chars);
+            UniChar* utf16 = charStorage.reset(2 * glyphCount);
+            src = utf16;
             for (int i = 0; i < glyphCount; ++i) {
-                SkUnichar uni = SkUTF8_NextUnichar(&u8);
-                int n = SkUTF16_FromUnichar(uni, (uint16_t*)u16);
-                u16 += n;
+                SkUnichar uni = SkUTF8_NextUnichar(&utf8);
+                utf16 += SkUTF16_FromUnichar(uni, utf16);
             }
+            srcCount = utf16 - src;
             break;
         }
-        case kUTF16_Encoding:
-            src = (const UniChar*)chars;
-            break;
-        case kUTF32_Encoding: {
-            const SkUnichar* u32 = (const SkUnichar*)chars;
-            const UniChar* u16 = src = charStorage.reset(2 * glyphCount);
+        case kUTF16_Encoding: {
+            src = reinterpret_cast<const UniChar*>(chars);
+            int extra = 0;
             for (int i = 0; i < glyphCount; ++i) {
-                int n = SkUTF16_FromUnichar(u32[i], (uint16_t*)u16);
-                u16 += n;
+                if (SkUTF16_IsHighSurrogate(src[i + extra])) {
+                    ++extra;
+                }
             }
+            srcCount = glyphCount + extra;
+            break;
+        }
+        case kUTF32_Encoding: {
+            const SkUnichar* utf32 = reinterpret_cast<const SkUnichar*>(chars);
+            UniChar* utf16 = charStorage.reset(2 * glyphCount);
+            src = utf16;
+            for (int i = 0; i < glyphCount; ++i) {
+                utf16 += SkUTF16_FromUnichar(utf32[i], utf16);
+            }
+            srcCount = utf16 - src;
             break;
         }
     }
 
-    // Our caller may not want glyphs for output, but we need to give that
-    // storage to CT, so we can walk it looking for the first non-zero.
+    // If glyphs is NULL, CT still needs glyph storage for finding the first failure.
+    // Also, if there are any non-bmp code points, the provided 'glyphs' storage will be inadequate.
     SkAutoSTMalloc<1024, uint16_t> glyphStorage;
     uint16_t* macGlyphs = glyphs;
-    if (NULL == macGlyphs) {
-        macGlyphs = glyphStorage.reset(glyphCount);
+    if (NULL == macGlyphs || srcCount > glyphCount) {
+        macGlyphs = glyphStorage.reset(srcCount);
     }
 
-    if (CTFontGetGlyphsForCharacters(fFontRef, src, macGlyphs, glyphCount)) {
+    bool allEncoded = CTFontGetGlyphsForCharacters(fFontRef, src, macGlyphs, srcCount);
+
+    // If there were any non-bmp, then copy and compact.
+    // If 'glyphs' is NULL, then compact glyphStorage in-place.
+    // If all are bmp and 'glyphs' is non-NULL, 'glyphs' already contains the compact glyphs.
+    // If some are non-bmp and 'glyphs' is non-NULL, copy and compact into 'glyphs'.
+    uint16_t* compactedGlyphs = glyphs;
+    if (NULL == compactedGlyphs) {
+        compactedGlyphs = macGlyphs;
+    }
+    if (srcCount > glyphCount) {
+        int extra = 0;
+        for (int i = 0; i < glyphCount; ++i) {
+            if (SkUTF16_IsHighSurrogate(src[i + extra])) {
+                ++extra;
+            }
+            compactedGlyphs[i] = macGlyphs[i + extra];
+        }
+    }
+
+    if (allEncoded) {
         return glyphCount;
     }
-    // If we got false, then we need to manually look for first failure
+
+    // If we got false, then we need to manually look for first failure.
     for (int i = 0; i < glyphCount; ++i) {
-        if (0 == macGlyphs[i]) {
+        if (0 == compactedGlyphs[i]) {
             return i;
         }
     }
-    // odd to get here, as we expected CT to have returned true up front.
+    // Odd to get here, as we expected CT to have returned true up front.
     return glyphCount;
 }