Add nbsp info to the cache

Change-Id: I9d4cd6f3d7c2871c1279c3a7190187bbfee1b56b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/383856
Commit-Queue: Julia Lavrova <jlavrova@google.com>
Reviewed-by: Ben Wagner <bungeman@google.com>
diff --git a/modules/skparagraph/samples/SampleParagraph.cpp b/modules/skparagraph/samples/SampleParagraph.cpp
index 134b486..7c63af4 100644
--- a/modules/skparagraph/samples/SampleParagraph.cpp
+++ b/modules/skparagraph/samples/SampleParagraph.cpp
@@ -2575,7 +2575,7 @@
                          cluster.textRange().start, cluster.textRange().end,
                          cluster.isSoftBreak() ? "soft" :
                          cluster.isHardBreak() ? "hard" :
-                         cluster.isWhitespaces() ? "spaces" : "");
+                         cluster.isWhitespaceBreak() ? "spaces" : "");
             }
 
             auto lines = impl->lines();
diff --git a/modules/skparagraph/src/ParagraphImpl.cpp b/modules/skparagraph/src/ParagraphImpl.cpp
index 3a6b1da..785a87b 100644
--- a/modules/skparagraph/src/ParagraphImpl.cpp
+++ b/modules/skparagraph/src/ParagraphImpl.cpp
@@ -28,11 +28,6 @@
 
 namespace {
 
-static inline SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
-    SkUnichar val = SkUTF::NextUTF8(ptr, end);
-    return val < 0 ? 0xFFFD : val;
-}
-
 SkScalar littleRound(SkScalar a) {
     // This rounding is done to match Flutter tests. Must be removed..
     auto val = std::fabs(a);
@@ -270,14 +265,20 @@
         return false;
     }
 
-    // Get white spaces
-    std::vector<SkUnicode::Position> whitespaces;
-    if (!fUnicode->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
-        return false;
-    }
-    for (auto whitespace : whitespaces) {
-        fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
-    }
+    // Get all spaces
+    fUnicode->forEachCodepoint(fText.c_str(), fText.size(),
+       [this](SkUnichar unichar, int32_t start, int32_t end) {
+            if (fUnicode->isWhitespace(unichar)) {
+                for (auto i = start; i < end; ++i) {
+                    fCodeUnitProperties[i] |=  CodeUnitFlags::kPartOfWhiteSpaceBreak;
+                }
+            }
+            if (fUnicode->isSpace(unichar)) {
+                for (auto i = start; i < end; ++i) {
+                    fCodeUnitProperties[i] |=  CodeUnitFlags::kPartOfIntraWordBreak;
+                }
+            }
+       });
 
     // Get line breaks
     std::vector<SkUnicode::LineBreakBefore> lineBreaks;
@@ -376,7 +377,7 @@
 
             // Process word spacing
             if (currentStyle->fStyle.getWordSpacing() != 0) {
-                if (cluster->isWhitespaces() && cluster->isSoftBreak()) {
+                if (cluster->isWhitespaceBreak() && cluster->isSoftBreak()) {
                     if (!soFarWhitespacesOnly) {
                         shift += run.addSpacesAtTheEnd(currentStyle->fStyle.getWordSpacing(), cluster);
                     }
@@ -387,7 +388,7 @@
                 shift += run.addSpacesEvenly(currentStyle->fStyle.getLetterSpacing(), cluster);
             }
 
-            if (soFarWhitespacesOnly && !cluster->isWhitespaces()) {
+            if (soFarWhitespacesOnly && !cluster->isWhitespaceBreak()) {
                 soFarWhitespacesOnly = false;
             }
         });
@@ -716,66 +717,6 @@
     return { SkToU32(start), SkToU32(end) };
 }
 
-void ParagraphImpl::forEachCodeUnitPropertyRange(CodeUnitFlags property, CodeUnitRangeVisitor visitor) {
-
-    size_t first = 0;
-    for (size_t i = 1; i < fText.size(); ++i) {
-        auto properties = fCodeUnitProperties[i];
-        if (properties & property) {
-            visitor({first, i});
-            first = i;
-        }
-
-    }
-    visitor({first, fText.size()});
-}
-
-size_t ParagraphImpl::getWhitespacesLength(TextRange textRange) {
-    size_t len = 0;
-    for (auto i = textRange.start; i < textRange.end; ++i) {
-        auto properties = fCodeUnitProperties[i];
-        if (properties & CodeUnitFlags::kPartOfWhiteSpace) {
-            ++len;
-        }
-    }
-    return len;
-}
-
-static bool is_ascii_7bit_space(int c) {
-    SkASSERT(c >= 0 && c <= 127);
-
-    // Extracted from https://en.wikipedia.org/wiki/Whitespace_character
-    //
-    enum WS {
-        kHT    = 9,
-        kLF    = 10,
-        kVT    = 11,
-        kFF    = 12,
-        kCR    = 13,
-        kSP    = 32,    // too big to use as shift
-    };
-#define M(shift)    (1 << (shift))
-    constexpr uint32_t kSpaceMask = M(kHT) | M(kLF) | M(kVT) | M(kFF) | M(kCR);
-    // we check for Space (32) explicitly, since it is too large to shift
-    return (c == kSP) || (c <= 31 && (kSpaceMask & M(c)));
-#undef M
-}
-
-bool ParagraphImpl::isSpace(TextRange textRange) {
-    auto text = ParagraphImpl::text(textRange);
-    const char* ch = text.begin();
-    if (text.end() - ch == 1 && *(unsigned char*)ch <= 0x7F) {
-        return is_ascii_7bit_space(*ch);
-    }
-    while (ch != text.end()) {
-        SkUnichar unicode = nextUtf8Unit(&ch, text.end());
-        if (!fUnicode->isSpace(unicode)) {
-            return false;
-        }
-    }
-    return true;
-}
-
 void ParagraphImpl::getLineMetrics(std::vector<LineMetrics>& metrics) {
     metrics.clear();
     for (auto& line : fLines) {
diff --git a/modules/skparagraph/src/ParagraphImpl.h b/modules/skparagraph/src/ParagraphImpl.h
index 84fa98e..555b50e 100644
--- a/modules/skparagraph/src/ParagraphImpl.h
+++ b/modules/skparagraph/src/ParagraphImpl.h
@@ -37,11 +37,12 @@
 namespace textlayout {
 
 enum CodeUnitFlags {
-    kNoCodeUnitFlag = 0x0,
-    kPartOfWhiteSpace = 0x1,
-    kGraphemeStart = 0x2,
-    kSoftLineBreakBefore = 0x4,
-    kHardLineBreakBefore = 0x8,
+    kNoCodeUnitFlag = 0x00,
+    kPartOfWhiteSpaceBreak = 0x01,
+    kGraphemeStart = 0x02,
+    kSoftLineBreakBefore = 0x04,
+    kHardLineBreakBefore = 0x08,
+    kPartOfIntraWordBreak = 0x10,
 };
 }  // namespace textlayout
 }  // namespace skia
@@ -217,11 +218,6 @@
         }
     }
 
-    using CodeUnitRangeVisitor = std::function<bool(TextRange textRange)>;
-    void forEachCodeUnitPropertyRange(CodeUnitFlags property, CodeUnitRangeVisitor visitor);
-    size_t getWhitespacesLength(TextRange textRange);
-    bool isSpace(TextRange textRange);
-
     bool codeUnitHasProperty(size_t index, CodeUnitFlags property) const { return (fCodeUnitProperties[index] & property) == property; }
 
     SkUnicode* getUnicode() { return fUnicode.get(); }
diff --git a/modules/skparagraph/src/Run.cpp b/modules/skparagraph/src/Run.cpp
index e5d8f5f..41dbd09 100644
--- a/modules/skparagraph/src/Run.cpp
+++ b/modules/skparagraph/src/Run.cpp
@@ -366,6 +366,26 @@
     return fOwner->codeUnitHasProperty(fTextRange.end, CodeUnitFlags::kGraphemeStart);
 }
 
+static bool is_ascii_7bit_space(int c) {
+    SkASSERT(c >= 0 && c <= 127);
+
+    // Extracted from https://en.wikipedia.org/wiki/Whitespace_character
+    //
+    enum WS {
+        kHT    = 9,
+        kLF    = 10,
+        kVT    = 11,
+        kFF    = 12,
+        kCR    = 13,
+        kSP    = 32,    // too big to use as shift
+    };
+#define M(shift)    (1 << (shift))
+    constexpr uint32_t kSpaceMask = M(kHT) | M(kLF) | M(kVT) | M(kFF) | M(kCR);
+    // we check for Space (32) explicitly, since it is too large to shift
+    return (c == kSP) || (c <= 31 && (kSpaceMask & M(c)));
+#undef M
+}
+
 Cluster::Cluster(ParagraphImpl* owner,
         RunIndex runIndex,
         size_t start,
@@ -383,9 +403,28 @@
         , fSpacing(0)
         , fHeight(height)
         , fHalfLetterSpacing(0.0) {
-    size_t len = fOwner->getWhitespacesLength(fTextRange);
-    fIsWhiteSpaces = (len == this->fTextRange.width());
-    fIsSpaces = fOwner->isSpace(fTextRange);
+    size_t whiteSpacesBreakLen = 0;
+    size_t intraWordBreakLen = 0;
+
+    const char* ch = text.begin();
+    if (text.end() - ch == 1 && *(unsigned char*)ch <= 0x7F) {
+        // I am not even sure it's worth it if we do not save a unicode call
+        if (is_ascii_7bit_space(*ch)) {
+            ++whiteSpacesBreakLen;
+        }
+    } else {
+        for (auto i = fTextRange.start; i < fTextRange.end; ++i) {
+            if (fOwner->codeUnitHasProperty(i, CodeUnitFlags::kPartOfWhiteSpaceBreak)) {
+                ++whiteSpacesBreakLen;
+            }
+            if (fOwner->codeUnitHasProperty(i, CodeUnitFlags::kPartOfIntraWordBreak)) {
+                ++intraWordBreakLen;
+            }
+        }
+    }
+
+    fIsWhiteSpaceBreak = whiteSpacesBreakLen == fTextRange.width();
+    fIsIntraWordBreak = intraWordBreakLen == fTextRange.width();
     fIsHardBreak = fOwner->codeUnitHasProperty(fTextRange.end, CodeUnitFlags::kHardLineBreakBefore);
 }
 
diff --git a/modules/skparagraph/src/Run.h b/modules/skparagraph/src/Run.h
index c6e36c5..e73d909 100644
--- a/modules/skparagraph/src/Run.h
+++ b/modules/skparagraph/src/Run.h
@@ -247,9 +247,10 @@
         fWidth += shift;
     }
 
-    bool isWhitespaces() const { return fIsWhiteSpaces; }
-    bool isSpaces() const { return fIsSpaces; }
+    bool isWhitespaceBreak() const { return fIsWhiteSpaceBreak; }
+    bool isIntraWordBreak() const { return fIsIntraWordBreak; }
     bool isHardBreak() const { return fIsHardBreak; }
+
     bool isSoftBreak() const;
     bool isGraphemeBreak() const;
     bool canBreakLineAfter() const { return isHardBreak() || isSoftBreak(); }
@@ -298,8 +299,9 @@
     SkScalar fSpacing;
     SkScalar fHeight;
     SkScalar fHalfLetterSpacing;
-    bool fIsWhiteSpaces;
-    bool fIsSpaces;
+
+    bool fIsWhiteSpaceBreak;
+    bool fIsIntraWordBreak;
     bool fIsHardBreak;
 };
 
diff --git a/modules/skparagraph/src/TextLine.cpp b/modules/skparagraph/src/TextLine.cpp
index 49d71c6..b4b3973 100644
--- a/modules/skparagraph/src/TextLine.cpp
+++ b/modules/skparagraph/src/TextLine.cpp
@@ -415,7 +415,7 @@
     bool whitespacePatch = false;
     this->iterateThroughClustersInGlyphsOrder(false, false,
         [&whitespacePatches, &textLen, &whitespacePatch](const Cluster* cluster, bool ghost) {
-            if (cluster->isWhitespaces()) {
+            if (cluster->isWhitespaceBreak()) {
                 if (!whitespacePatch) {
                     whitespacePatch = true;
                     ++whitespacePatches;
@@ -448,7 +448,7 @@
         }
 
         auto prevShift = shift;
-        if (cluster->isWhitespaces()) {
+        if (cluster->isWhitespaceBreak()) {
             if (!whitespacePatch) {
                 shift += step;
                 whitespacePatch = true;
diff --git a/modules/skparagraph/src/TextWrapper.cpp b/modules/skparagraph/src/TextWrapper.cpp
index e9c1142..3161324 100644
--- a/modules/skparagraph/src/TextWrapper.cpp
+++ b/modules/skparagraph/src/TextWrapper.cpp
@@ -36,7 +36,7 @@
         auto roundedWidth = littleRound(width);
         if (cluster->isHardBreak()) {
         } else if (roundedWidth > maxWidth) {
-            if (cluster->isWhitespaces()) {
+            if (cluster->isWhitespaceBreak()) {
                 // It's the end of the word
                 fClusters.extend(cluster);
                 fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, getClustersTrimmedWidth());
@@ -64,7 +64,7 @@
             SkScalar nextWordLength = fClusters.width();
             SkScalar nextShortWordLength = nextWordLength;
             for (auto further = cluster; further != endOfClusters; ++further) {
-                if (further->isSoftBreak() || further->isHardBreak() || further->isWhitespaces()) {
+                if (further->isSoftBreak() || further->isHardBreak() || further->isWhitespaceBreak()) {
                     break;
                 }
                 if (further->run().isPlaceholder()) {
@@ -72,7 +72,7 @@
                   break;
                 }
 
-                if (further->isSpaces() && nextWordLength <= maxWidth) {
+                if (nextWordLength > 0 && nextWordLength <= maxWidth && further->isIntraWordBreak()) {
                     // The cluster is spaces but not the end of the word in a normal sense
                     nextNonBreakingSpace = further;
                     nextShortWordLength = nextWordLength;
@@ -179,7 +179,7 @@
     fEndLine.saveBreak();
     // Skip all space cluster at the end
     for (auto cluster = fEndLine.endCluster();
-         cluster >= fEndLine.startCluster() && cluster->isWhitespaces();
+         cluster >= fEndLine.startCluster() && cluster->isWhitespaceBreak();
          --cluster) {
         fEndLine.trim(cluster);
     }
@@ -195,7 +195,7 @@
             continue;
         }
         if (trailingSpaces) {
-            if (!cluster->isWhitespaces()) {
+            if (!cluster->isWhitespaceBreak()) {
                 width += cluster->trimmedWidth(cluster->endPos());
                 trailingSpaces = false;
             }
@@ -213,7 +213,7 @@
         // End of line is always end of cluster, but need to skip \n
         auto width = fEndLine.width();
         auto cluster = fEndLine.endCluster() + 1;
-        while (cluster < fEndLine.breakCluster() && cluster->isWhitespaces()) {
+        while (cluster < fEndLine.breakCluster() && cluster->isWhitespaceBreak()) {
             width += cluster->width();
             ++cluster;
         }
@@ -224,7 +224,7 @@
     // It's a soft line break so we need to move lineStart forward skipping all the spaces
     auto width = fEndLine.widthWithGhostSpaces();
     auto cluster = fEndLine.breakCluster() + 1;
-    while (cluster < endOfClusters && cluster->isWhitespaces()) {
+    while (cluster < endOfClusters && cluster->isWhitespaceBreak()) {
         width += cluster->width();
         ++cluster;
     }
@@ -391,7 +391,7 @@
                 softLineMaxIntrinsicWidth = 0;
                 fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, lastWordLength);
                 lastWordLength = 0;
-            } else if (cluster->isWhitespaces()) {
+            } else if (cluster->isWhitespaceBreak()) {
                 // Whitespaces end the word
                 softLineMaxIntrinsicWidth += cluster->width();
                 fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, lastWordLength);
diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h
index 5ec3739..5a718e3 100644
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@@ -134,8 +134,19 @@
                (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
         virtual bool getGraphemes
                (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
-        virtual bool getWhitespaces
-               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
+
+        template <typename Callback>
+        void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
+            const char* current = utf8;
+            const char* end = utf8 + utf8Units;
+            while (current < end) {
+                auto before = current - utf8;
+                SkUnichar unichar = SkUTF::NextUTF8(&current, end);
+                if (unichar < 0) unichar = 0xFFFD;
+                auto after = current - utf8;
+                callback(unichar, before, after);
+            }
+        }
 
         virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
 
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
index c62f2ef..a753f92 100644
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -381,26 +381,6 @@
         return true;
     }
 
-    static bool extractWhitespaces(const char utf8[],
-                                   int utf8Units,
-                                   std::vector<Position>* whitespaces) {
-
-        const char* start = utf8;
-        const char* end = utf8 + utf8Units;
-        const char* ch = start;
-        while (ch < end) {
-            auto index = ch - start;
-            auto unichar = utf8_next(&ch, end);
-            if (u_isWhitespace(unichar)) {
-                auto ending = ch - start;
-                for (auto k = index; k < ending; ++k) {
-                  whitespaces->emplace_back(k);
-                }
-            }
-        }
-        return true;
-    }
-
     static int utf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
         int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
         if (utf16Units < 0) {
@@ -516,11 +496,6 @@
         });
     }
 
-    bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
-
-        return extractWhitespaces(utf8, utf8Units, results);
-    }
-
     void reorderVisual(const BidiLevel runLevels[],
                        int levelsCount,
                        int32_t logicalFromVisual[]) override {