Add nbsp info to the cache
Change-Id: I9d4cd6f3d7c2871c1279c3a7190187bbfee1b56b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/383856
Commit-Queue: Julia Lavrova <jlavrova@google.com>
Reviewed-by: Ben Wagner <bungeman@google.com>
diff --git a/modules/skparagraph/samples/SampleParagraph.cpp b/modules/skparagraph/samples/SampleParagraph.cpp
index 134b486..7c63af4 100644
--- a/modules/skparagraph/samples/SampleParagraph.cpp
+++ b/modules/skparagraph/samples/SampleParagraph.cpp
@@ -2575,7 +2575,7 @@
cluster.textRange().start, cluster.textRange().end,
cluster.isSoftBreak() ? "soft" :
cluster.isHardBreak() ? "hard" :
- cluster.isWhitespaces() ? "spaces" : "");
+ cluster.isWhitespaceBreak() ? "spaces" : "");
}
auto lines = impl->lines();
diff --git a/modules/skparagraph/src/ParagraphImpl.cpp b/modules/skparagraph/src/ParagraphImpl.cpp
index 3a6b1da..785a87b 100644
--- a/modules/skparagraph/src/ParagraphImpl.cpp
+++ b/modules/skparagraph/src/ParagraphImpl.cpp
@@ -28,11 +28,6 @@
namespace {
-static inline SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
- SkUnichar val = SkUTF::NextUTF8(ptr, end);
- return val < 0 ? 0xFFFD : val;
-}
-
SkScalar littleRound(SkScalar a) {
// This rounding is done to match Flutter tests. Must be removed..
auto val = std::fabs(a);
@@ -270,14 +265,20 @@
return false;
}
- // Get white spaces
- std::vector<SkUnicode::Position> whitespaces;
- if (!fUnicode->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
- return false;
- }
- for (auto whitespace : whitespaces) {
- fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
- }
+ // Get all spaces
+ fUnicode->forEachCodepoint(fText.c_str(), fText.size(),
+ [this](SkUnichar unichar, int32_t start, int32_t end) {
+ if (fUnicode->isWhitespace(unichar)) {
+ for (auto i = start; i < end; ++i) {
+ fCodeUnitProperties[i] |= CodeUnitFlags::kPartOfWhiteSpaceBreak;
+ }
+ }
+ if (fUnicode->isSpace(unichar)) {
+ for (auto i = start; i < end; ++i) {
+ fCodeUnitProperties[i] |= CodeUnitFlags::kPartOfIntraWordBreak;
+ }
+ }
+ });
// Get line breaks
std::vector<SkUnicode::LineBreakBefore> lineBreaks;
@@ -376,7 +377,7 @@
// Process word spacing
if (currentStyle->fStyle.getWordSpacing() != 0) {
- if (cluster->isWhitespaces() && cluster->isSoftBreak()) {
+ if (cluster->isWhitespaceBreak() && cluster->isSoftBreak()) {
if (!soFarWhitespacesOnly) {
shift += run.addSpacesAtTheEnd(currentStyle->fStyle.getWordSpacing(), cluster);
}
@@ -387,7 +388,7 @@
shift += run.addSpacesEvenly(currentStyle->fStyle.getLetterSpacing(), cluster);
}
- if (soFarWhitespacesOnly && !cluster->isWhitespaces()) {
+ if (soFarWhitespacesOnly && !cluster->isWhitespaceBreak()) {
soFarWhitespacesOnly = false;
}
});
@@ -716,66 +717,6 @@
return { SkToU32(start), SkToU32(end) };
}
-void ParagraphImpl::forEachCodeUnitPropertyRange(CodeUnitFlags property, CodeUnitRangeVisitor visitor) {
-
- size_t first = 0;
- for (size_t i = 1; i < fText.size(); ++i) {
- auto properties = fCodeUnitProperties[i];
- if (properties & property) {
- visitor({first, i});
- first = i;
- }
-
- }
- visitor({first, fText.size()});
-}
-
-size_t ParagraphImpl::getWhitespacesLength(TextRange textRange) {
- size_t len = 0;
- for (auto i = textRange.start; i < textRange.end; ++i) {
- auto properties = fCodeUnitProperties[i];
- if (properties & CodeUnitFlags::kPartOfWhiteSpace) {
- ++len;
- }
- }
- return len;
-}
-
-static bool is_ascii_7bit_space(int c) {
- SkASSERT(c >= 0 && c <= 127);
-
- // Extracted from https://en.wikipedia.org/wiki/Whitespace_character
- //
- enum WS {
- kHT = 9,
- kLF = 10,
- kVT = 11,
- kFF = 12,
- kCR = 13,
- kSP = 32, // too big to use as shift
- };
-#define M(shift) (1 << (shift))
- constexpr uint32_t kSpaceMask = M(kHT) | M(kLF) | M(kVT) | M(kFF) | M(kCR);
- // we check for Space (32) explicitly, since it is too large to shift
- return (c == kSP) || (c <= 31 && (kSpaceMask & M(c)));
-#undef M
-}
-
-bool ParagraphImpl::isSpace(TextRange textRange) {
- auto text = ParagraphImpl::text(textRange);
- const char* ch = text.begin();
- if (text.end() - ch == 1 && *(unsigned char*)ch <= 0x7F) {
- return is_ascii_7bit_space(*ch);
- }
- while (ch != text.end()) {
- SkUnichar unicode = nextUtf8Unit(&ch, text.end());
- if (!fUnicode->isSpace(unicode)) {
- return false;
- }
- }
- return true;
-}
-
void ParagraphImpl::getLineMetrics(std::vector<LineMetrics>& metrics) {
metrics.clear();
for (auto& line : fLines) {
diff --git a/modules/skparagraph/src/ParagraphImpl.h b/modules/skparagraph/src/ParagraphImpl.h
index 84fa98e..555b50e 100644
--- a/modules/skparagraph/src/ParagraphImpl.h
+++ b/modules/skparagraph/src/ParagraphImpl.h
@@ -37,11 +37,12 @@
namespace textlayout {
enum CodeUnitFlags {
- kNoCodeUnitFlag = 0x0,
- kPartOfWhiteSpace = 0x1,
- kGraphemeStart = 0x2,
- kSoftLineBreakBefore = 0x4,
- kHardLineBreakBefore = 0x8,
+ kNoCodeUnitFlag = 0x00,
+ kPartOfWhiteSpaceBreak = 0x01,
+ kGraphemeStart = 0x02,
+ kSoftLineBreakBefore = 0x04,
+ kHardLineBreakBefore = 0x08,
+ kPartOfIntraWordBreak = 0x10,
};
} // namespace textlayout
} // namespace skia
@@ -217,11 +218,6 @@
}
}
- using CodeUnitRangeVisitor = std::function<bool(TextRange textRange)>;
- void forEachCodeUnitPropertyRange(CodeUnitFlags property, CodeUnitRangeVisitor visitor);
- size_t getWhitespacesLength(TextRange textRange);
- bool isSpace(TextRange textRange);
-
bool codeUnitHasProperty(size_t index, CodeUnitFlags property) const { return (fCodeUnitProperties[index] & property) == property; }
SkUnicode* getUnicode() { return fUnicode.get(); }
diff --git a/modules/skparagraph/src/Run.cpp b/modules/skparagraph/src/Run.cpp
index e5d8f5f..41dbd09 100644
--- a/modules/skparagraph/src/Run.cpp
+++ b/modules/skparagraph/src/Run.cpp
@@ -366,6 +366,26 @@
return fOwner->codeUnitHasProperty(fTextRange.end, CodeUnitFlags::kGraphemeStart);
}
+static bool is_ascii_7bit_space(int c) {
+ SkASSERT(c >= 0 && c <= 127);
+
+ // Extracted from https://en.wikipedia.org/wiki/Whitespace_character
+ //
+ enum WS {
+ kHT = 9,
+ kLF = 10,
+ kVT = 11,
+ kFF = 12,
+ kCR = 13,
+ kSP = 32, // too big to use as shift
+ };
+#define M(shift) (1 << (shift))
+ constexpr uint32_t kSpaceMask = M(kHT) | M(kLF) | M(kVT) | M(kFF) | M(kCR);
+ // we check for Space (32) explicitly, since it is too large to shift
+ return (c == kSP) || (c <= 31 && (kSpaceMask & M(c)));
+#undef M
+}
+
Cluster::Cluster(ParagraphImpl* owner,
RunIndex runIndex,
size_t start,
@@ -383,9 +403,28 @@
, fSpacing(0)
, fHeight(height)
, fHalfLetterSpacing(0.0) {
- size_t len = fOwner->getWhitespacesLength(fTextRange);
- fIsWhiteSpaces = (len == this->fTextRange.width());
- fIsSpaces = fOwner->isSpace(fTextRange);
+ size_t whiteSpacesBreakLen = 0;
+ size_t intraWordBreakLen = 0;
+
+ const char* ch = text.begin();
+ if (text.end() - ch == 1 && *(unsigned char*)ch <= 0x7F) {
+ // I am not even sure it's worth it if we do not save a unicode call
+ if (is_ascii_7bit_space(*ch)) {
+ ++whiteSpacesBreakLen;
+ }
+ } else {
+ for (auto i = fTextRange.start; i < fTextRange.end; ++i) {
+ if (fOwner->codeUnitHasProperty(i, CodeUnitFlags::kPartOfWhiteSpaceBreak)) {
+ ++whiteSpacesBreakLen;
+ }
+ if (fOwner->codeUnitHasProperty(i, CodeUnitFlags::kPartOfIntraWordBreak)) {
+ ++intraWordBreakLen;
+ }
+ }
+ }
+
+ fIsWhiteSpaceBreak = whiteSpacesBreakLen == fTextRange.width();
+ fIsIntraWordBreak = intraWordBreakLen == fTextRange.width();
fIsHardBreak = fOwner->codeUnitHasProperty(fTextRange.end, CodeUnitFlags::kHardLineBreakBefore);
}
diff --git a/modules/skparagraph/src/Run.h b/modules/skparagraph/src/Run.h
index c6e36c5..e73d909 100644
--- a/modules/skparagraph/src/Run.h
+++ b/modules/skparagraph/src/Run.h
@@ -247,9 +247,10 @@
fWidth += shift;
}
- bool isWhitespaces() const { return fIsWhiteSpaces; }
- bool isSpaces() const { return fIsSpaces; }
+ bool isWhitespaceBreak() const { return fIsWhiteSpaceBreak; }
+ bool isIntraWordBreak() const { return fIsIntraWordBreak; }
bool isHardBreak() const { return fIsHardBreak; }
+
bool isSoftBreak() const;
bool isGraphemeBreak() const;
bool canBreakLineAfter() const { return isHardBreak() || isSoftBreak(); }
@@ -298,8 +299,9 @@
SkScalar fSpacing;
SkScalar fHeight;
SkScalar fHalfLetterSpacing;
- bool fIsWhiteSpaces;
- bool fIsSpaces;
+
+ bool fIsWhiteSpaceBreak;
+ bool fIsIntraWordBreak;
bool fIsHardBreak;
};
diff --git a/modules/skparagraph/src/TextLine.cpp b/modules/skparagraph/src/TextLine.cpp
index 49d71c6..b4b3973 100644
--- a/modules/skparagraph/src/TextLine.cpp
+++ b/modules/skparagraph/src/TextLine.cpp
@@ -415,7 +415,7 @@
bool whitespacePatch = false;
this->iterateThroughClustersInGlyphsOrder(false, false,
[&whitespacePatches, &textLen, &whitespacePatch](const Cluster* cluster, bool ghost) {
- if (cluster->isWhitespaces()) {
+ if (cluster->isWhitespaceBreak()) {
if (!whitespacePatch) {
whitespacePatch = true;
++whitespacePatches;
@@ -448,7 +448,7 @@
}
auto prevShift = shift;
- if (cluster->isWhitespaces()) {
+ if (cluster->isWhitespaceBreak()) {
if (!whitespacePatch) {
shift += step;
whitespacePatch = true;
diff --git a/modules/skparagraph/src/TextWrapper.cpp b/modules/skparagraph/src/TextWrapper.cpp
index e9c1142..3161324 100644
--- a/modules/skparagraph/src/TextWrapper.cpp
+++ b/modules/skparagraph/src/TextWrapper.cpp
@@ -36,7 +36,7 @@
auto roundedWidth = littleRound(width);
if (cluster->isHardBreak()) {
} else if (roundedWidth > maxWidth) {
- if (cluster->isWhitespaces()) {
+ if (cluster->isWhitespaceBreak()) {
// It's the end of the word
fClusters.extend(cluster);
fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, getClustersTrimmedWidth());
@@ -64,7 +64,7 @@
SkScalar nextWordLength = fClusters.width();
SkScalar nextShortWordLength = nextWordLength;
for (auto further = cluster; further != endOfClusters; ++further) {
- if (further->isSoftBreak() || further->isHardBreak() || further->isWhitespaces()) {
+ if (further->isSoftBreak() || further->isHardBreak() || further->isWhitespaceBreak()) {
break;
}
if (further->run().isPlaceholder()) {
@@ -72,7 +72,7 @@
break;
}
- if (further->isSpaces() && nextWordLength <= maxWidth) {
+ if (nextWordLength > 0 && nextWordLength <= maxWidth && further->isIntraWordBreak()) {
// The cluster is spaces but not the end of the word in a normal sense
nextNonBreakingSpace = further;
nextShortWordLength = nextWordLength;
@@ -179,7 +179,7 @@
fEndLine.saveBreak();
// Skip all space cluster at the end
for (auto cluster = fEndLine.endCluster();
- cluster >= fEndLine.startCluster() && cluster->isWhitespaces();
+ cluster >= fEndLine.startCluster() && cluster->isWhitespaceBreak();
--cluster) {
fEndLine.trim(cluster);
}
@@ -195,7 +195,7 @@
continue;
}
if (trailingSpaces) {
- if (!cluster->isWhitespaces()) {
+ if (!cluster->isWhitespaceBreak()) {
width += cluster->trimmedWidth(cluster->endPos());
trailingSpaces = false;
}
@@ -213,7 +213,7 @@
// End of line is always end of cluster, but need to skip \n
auto width = fEndLine.width();
auto cluster = fEndLine.endCluster() + 1;
- while (cluster < fEndLine.breakCluster() && cluster->isWhitespaces()) {
+ while (cluster < fEndLine.breakCluster() && cluster->isWhitespaceBreak()) {
width += cluster->width();
++cluster;
}
@@ -224,7 +224,7 @@
// It's a soft line break so we need to move lineStart forward skipping all the spaces
auto width = fEndLine.widthWithGhostSpaces();
auto cluster = fEndLine.breakCluster() + 1;
- while (cluster < endOfClusters && cluster->isWhitespaces()) {
+ while (cluster < endOfClusters && cluster->isWhitespaceBreak()) {
width += cluster->width();
++cluster;
}
@@ -391,7 +391,7 @@
softLineMaxIntrinsicWidth = 0;
fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, lastWordLength);
lastWordLength = 0;
- } else if (cluster->isWhitespaces()) {
+ } else if (cluster->isWhitespaceBreak()) {
// Whitespaces end the word
softLineMaxIntrinsicWidth += cluster->width();
fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, lastWordLength);
diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h
index 5ec3739..5a718e3 100644
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@@ -134,8 +134,19 @@
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
virtual bool getGraphemes
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
- virtual bool getWhitespaces
- (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
+
+ template <typename Callback>
+ void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
+ const char* current = utf8;
+ const char* end = utf8 + utf8Units;
+ while (current < end) {
+ auto before = current - utf8;
+ SkUnichar unichar = SkUTF::NextUTF8(¤t, end);
+ if (unichar < 0) unichar = 0xFFFD;
+ auto after = current - utf8;
+ callback(unichar, before, after);
+ }
+ }
virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
index c62f2ef..a753f92 100644
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -381,26 +381,6 @@
return true;
}
- static bool extractWhitespaces(const char utf8[],
- int utf8Units,
- std::vector<Position>* whitespaces) {
-
- const char* start = utf8;
- const char* end = utf8 + utf8Units;
- const char* ch = start;
- while (ch < end) {
- auto index = ch - start;
- auto unichar = utf8_next(&ch, end);
- if (u_isWhitespace(unichar)) {
- auto ending = ch - start;
- for (auto k = index; k < ending; ++k) {
- whitespaces->emplace_back(k);
- }
- }
- }
- return true;
- }
-
static int utf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
if (utf16Units < 0) {
@@ -516,11 +496,6 @@
});
}
- bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
-
- return extractWhitespaces(utf8, utf8Units, results);
- }
-
void reorderVisual(const BidiLevel runLevels[],
int levelsCount,
int32_t logicalFromVisual[]) override {