| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define VERBOSE_DEBUG 0 |
| |
| #define LOG_TAG "Minikin" |
| |
| #include <limits> |
| |
| #include <log/log.h> |
| |
| #include "LayoutUtils.h" |
| #include <minikin/Layout.h> |
| #include <minikin/LineBreaker.h> |
| |
| using std::vector; |
| |
| namespace minikin { |
| |
| const int CHAR_TAB = 0x0009; |
| |
| // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these |
| // constants are larger than any reasonable actual width score. |
| const float SCORE_INFTY = std::numeric_limits<float>::max(); |
| const float SCORE_OVERFULL = 1e12f; |
| const float SCORE_DESPERATE = 1e10f; |
| |
| // Multiplier for hyphen penalty on last line. |
| const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; |
| // Penalty assigned to each line break (to try to minimize number of lines) |
| // TODO: when we implement full justification (so spaces can shrink and stretch), this is |
| // probably not the most appropriate method. |
| const float LINE_PENALTY_MULTIPLIER = 2.0f; |
| |
| // Penalty assigned to shrinking the whitepsace. |
| const float SHRINK_PENALTY_MULTIPLIER = 4.0f; |
| |
| // Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for |
| // unreasonably long words. This is somewhat of a heuristic because extremely long words |
| // are possible in some languages. This does mean that very long real words can get |
| // broken by desperate breaks, with no hyphens. |
| const size_t LONGEST_HYPHENATED_WORD = 45; |
| |
| // When the text buffer is within this limit, capacity of vectors is retained at finish(), |
| // to avoid allocation. |
| const size_t MAX_TEXT_BUF_RETAIN = 32678; |
| |
| // Maximum amount that spaces can shrink, in justified text. |
| const float SHRINKABILITY = 1.0 / 3.0; |
| |
| void LineBreaker::setLocales(const char* locales, const std::vector<Hyphenator*>& hyphenators) { |
| bool goodLocaleFound = false; |
| const ssize_t numLocales = hyphenators.size(); |
| // For now, we ignore all locales except the first valid one. |
| // TODO: Support selecting the locale based on the script of the text. |
| const char* localeStart = locales; |
| for (ssize_t i = 0; i < numLocales - 1; i++) { // Loop over all locales, except the last one. |
| const char* localeEnd = strchr(localeStart, ','); |
| const size_t localeNameLength = localeEnd - localeStart; |
| char localeName[localeNameLength + 1]; |
| strncpy(localeName, localeStart, localeNameLength); |
| localeName[localeNameLength] = '\0'; |
| mLocale = icu::Locale::createFromName(localeName); |
| goodLocaleFound = !mLocale.isBogus(); |
| if (goodLocaleFound) { |
| mHyphenator = hyphenators[i]; |
| break; |
| } else { |
| localeStart = localeEnd + 1; |
| } |
| } |
| if (!goodLocaleFound) { // Try the last locale. |
| mLocale = icu::Locale::createFromName(localeStart); |
| if (mLocale.isBogus()) { |
| // No good locale. |
| mLocale = icu::Locale::getRoot(); |
| mHyphenator = nullptr; |
| } else { |
| mHyphenator = numLocales == 0 ? nullptr : hyphenators[numLocales - 1]; |
| } |
| } |
| mWordBreaker.setLocale(mLocale); |
| } |
| |
| void LineBreaker::setText() { |
| mWordBreaker.setText(mTextBuf.data(), mTextBuf.size()); |
| |
| // handle initial break here because addStyleRun may never be called |
| mWordBreaker.next(); |
| mCandidates.clear(); |
| Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK}; |
| mCandidates.push_back(cand); |
| |
| // reset greedy breaker state |
| mBreaks.clear(); |
| mWidths.clear(); |
| mFlags.clear(); |
| mLastBreak = 0; |
| mBestBreak = 0; |
| mBestScore = SCORE_INFTY; |
| mPreBreak = 0; |
| mLastHyphenation = HyphenEdit::NO_EDIT; |
| mFirstTabIndex = INT_MAX; |
| mSpaceCount = 0; |
| } |
| |
| void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { |
| mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); |
| } |
| |
| |
| void LineBreaker::setIndents(const std::vector<float>& indents) { |
| mLineWidths.setIndents(indents); |
| } |
| |
| // This function determines whether a character is a space that disappears at end of line. |
| // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], |
| // plus '\n'. |
| // Note: all such characters are in the BMP, so it's ok to use code units for this. |
| static bool isLineEndSpace(uint16_t c) { |
| return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || |
| c == 0x205F || c == 0x3000; |
| } |
| |
| // Ordinarily, this method measures the text in the range given. However, when paint |
| // is nullptr, it assumes the widths have already been calculated and stored in the |
| // width buffer. |
| // This method finds the candidate word breaks (using the ICU break iterator) and sends them |
| // to addCandidate. |
| float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface, |
| FontStyle style, size_t start, size_t end, bool isRtl) { |
| float width = 0.0f; |
| int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; |
| |
| float hyphenPenalty = 0.0; |
| if (paint != nullptr) { |
| width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, |
| style, *paint, typeface, mCharWidths.data() + start); |
| |
| // a heuristic that seems to perform well |
| hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); |
| if (mHyphenationFrequency == kHyphenationFrequency_Normal) { |
| hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing |
| } |
| |
| if (mJustified) { |
| // Make hyphenation more aggressive for fully justified text (so that "normal" in |
| // justified mode is the same as "full" in ragged-right). |
| hyphenPenalty *= 0.25; |
| } else { |
| // Line penalty is zero for justified text. |
| mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); |
| } |
| } |
| |
| size_t current = (size_t)mWordBreaker.current(); |
| size_t afterWord = start; |
| size_t lastBreak = start; |
| ParaWidth lastBreakWidth = mWidth; |
| ParaWidth postBreak = mWidth; |
| size_t postSpaceCount = mSpaceCount; |
| for (size_t i = start; i < end; i++) { |
| uint16_t c = mTextBuf[i]; |
| if (c == CHAR_TAB) { |
| mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); |
| if (mFirstTabIndex == INT_MAX) { |
| mFirstTabIndex = (int)i; |
| } |
| // fall back to greedy; other modes don't know how to deal with tabs |
| mStrategy = kBreakStrategy_Greedy; |
| } else { |
| if (isWordSpace(c)) mSpaceCount += 1; |
| mWidth += mCharWidths[i]; |
| if (!isLineEndSpace(c)) { |
| postBreak = mWidth; |
| postSpaceCount = mSpaceCount; |
| afterWord = i + 1; |
| } |
| } |
| if (i + 1 == current) { |
| size_t wordStart = mWordBreaker.wordStart(); |
| size_t wordEnd = mWordBreaker.wordEnd(); |
| if (paint != nullptr && mHyphenator != nullptr && |
| mHyphenationFrequency != kHyphenationFrequency_None && |
| wordStart >= start && wordEnd > wordStart && |
| wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { |
| mHyphenator->hyphenate(&mHyphBuf, |
| &mTextBuf[wordStart], |
| wordEnd - wordStart, |
| mLocale); |
| #if VERBOSE_DEBUG |
| std::string hyphenatedString; |
| for (size_t j = wordStart; j < wordEnd; j++) { |
| if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) { |
| hyphenatedString.push_back('-'); |
| } |
| // Note: only works with ASCII, should do UTF-8 conversion here |
| hyphenatedString.push_back(buffer()[j]); |
| } |
| ALOGD("hyphenated string: %s", hyphenatedString.c_str()); |
| #endif |
| |
| // measure hyphenated substrings |
| for (size_t j = wordStart; j < wordEnd; j++) { |
| HyphenationType hyph = mHyphBuf[j - wordStart]; |
| if (hyph != HyphenationType::DONT_BREAK) { |
| paint->hyphenEdit = HyphenEdit::editForThisLine(hyph); |
| const float firstPartWidth = Layout::measureText(mTextBuf.data(), |
| lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style, |
| *paint, typeface, nullptr); |
| ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; |
| |
| paint->hyphenEdit = HyphenEdit::editForNextLine(hyph); |
| const float secondPartWidth = Layout::measureText(mTextBuf.data(), j, |
| afterWord - j, mTextBuf.size(), bidiFlags, style, *paint, |
| typeface, nullptr); |
| ParaWidth hyphPreBreak = postBreak - secondPartWidth; |
| |
| addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount, |
| hyphenPenalty, hyph); |
| |
| paint->hyphenEdit = HyphenEdit::NO_EDIT; |
| } |
| } |
| } |
| |
| // Skip break for zero-width characters inside replacement span |
| if (paint != nullptr || current == end || mCharWidths[current] > 0) { |
| float penalty = hyphenPenalty * mWordBreaker.breakBadness(); |
| addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty, |
| HyphenationType::DONT_BREAK); |
| } |
| lastBreak = current; |
| lastBreakWidth = mWidth; |
| current = (size_t)mWordBreaker.next(); |
| } |
| } |
| |
| return width; |
| } |
| |
| // add a word break (possibly for a hyphenated fragment), and add desperate breaks if |
| // needed (ie when word exceeds current line width) |
| void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, |
| size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) { |
| Candidate cand; |
| ParaWidth width = mCandidates.back().preBreak; |
| if (postBreak - width > currentLineWidth()) { |
| // Add desperate breaks. |
| // Note: these breaks are based on the shaping of the (non-broken) original text; they |
| // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. |
| size_t i = mCandidates.back().offset; |
| width += mCharWidths[i++]; |
| for (; i < offset; i++) { |
| float w = mCharWidths[i]; |
| if (w > 0) { |
| cand.offset = i; |
| cand.preBreak = width; |
| cand.postBreak = width; |
| // postSpaceCount doesn't include trailing spaces |
| cand.preSpaceCount = postSpaceCount; |
| cand.postSpaceCount = postSpaceCount; |
| cand.penalty = SCORE_DESPERATE; |
| cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; |
| #if VERBOSE_DEBUG |
| ALOGD("desperate cand: %zd %g:%g", |
| mCandidates.size(), cand.postBreak, cand.preBreak); |
| #endif |
| addCandidate(cand); |
| width += w; |
| } |
| } |
| } |
| |
| cand.offset = offset; |
| cand.preBreak = preBreak; |
| cand.postBreak = postBreak; |
| cand.penalty = penalty; |
| cand.preSpaceCount = preSpaceCount; |
| cand.postSpaceCount = postSpaceCount; |
| cand.hyphenType = hyph; |
| #if VERBOSE_DEBUG |
| ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); |
| #endif |
| addCandidate(cand); |
| } |
| |
| // Helper method for addCandidate() |
| void LineBreaker::pushGreedyBreak() { |
| const Candidate& bestCandidate = mCandidates[mBestBreak]; |
| pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak, |
| mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType)); |
| mBestScore = SCORE_INFTY; |
| #if VERBOSE_DEBUG |
| ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); |
| #endif |
| mLastBreak = mBestBreak; |
| mPreBreak = bestCandidate.preBreak; |
| mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType); |
| } |
| |
| // TODO performance: could avoid populating mCandidates if greedy only |
| void LineBreaker::addCandidate(Candidate cand) { |
| const size_t candIndex = mCandidates.size(); |
| mCandidates.push_back(cand); |
| |
| // mLastBreak is the index of the last line break we decided to do in mCandidates, |
| // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate |
| // we have found since then, and mBestScore is its penalty. |
| if (cand.postBreak - mPreBreak > currentLineWidth()) { |
| // This break would create an overfull line, pick the best break and break there (greedy) |
| if (mBestBreak == mLastBreak) { |
| // No good break has been found since last break. Break here. |
| mBestBreak = candIndex; |
| } |
| pushGreedyBreak(); |
| } |
| |
| while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) { |
| // We should rarely come here. But if we are here, we have broken the line, but the |
| // remaining part still doesn't fit. We now need to break at the second best place after the |
| // last break, but we have not kept that information, so we need to go back and find it. |
| // |
| // In some really rare cases, postBreak - preBreak of a candidate itself may be over the |
| // current line width. We protect ourselves against an infinite loop in that case by |
| // checking that we have not broken the line at this candidate already. |
| for (size_t i = mLastBreak + 1; i < candIndex; i++) { |
| const float penalty = mCandidates[i].penalty; |
| if (penalty <= mBestScore) { |
| mBestBreak = i; |
| mBestScore = penalty; |
| } |
| } |
| if (mBestBreak == mLastBreak) { |
| // We didn't find anything good. Break here. |
| mBestBreak = candIndex; |
| } |
| pushGreedyBreak(); |
| } |
| |
| if (cand.penalty <= mBestScore) { |
| mBestBreak = candIndex; |
| mBestScore = cand.penalty; |
| } |
| } |
| |
| void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) { |
| mBreaks.push_back(offset); |
| mWidths.push_back(width); |
| int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; |
| flags |= hyphenEdit; |
| mFlags.push_back(flags); |
| mFirstTabIndex = INT_MAX; |
| } |
| |
| void LineBreaker::addReplacement(size_t start, size_t end, float width) { |
| mCharWidths[start] = width; |
| std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); |
| addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); |
| } |
| |
| // Get the width of a space. May return 0 if there are no spaces. |
| // Note: if there are multiple different widths for spaces (for example, because of mixing of |
| // fonts), it's only guaranteed to pick one. |
| float LineBreaker::getSpaceWidth() const { |
| for (size_t i = 0; i < mTextBuf.size(); i++) { |
| if (isWordSpace(mTextBuf[i])) { |
| return mCharWidths[i]; |
| } |
| } |
| return 0.0f; |
| } |
| |
| float LineBreaker::currentLineWidth() const { |
| return mLineWidths.getLineWidth(mBreaks.size()); |
| } |
| |
| void LineBreaker::computeBreaksGreedy() { |
| // All breaks but the last have been added in addCandidate already. |
| size_t nCand = mCandidates.size(); |
| if (nCand == 1 || mLastBreak != nCand - 1) { |
| pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, |
| mLastHyphenation); |
| // don't need to update mBestScore, because we're done |
| #if VERBOSE_DEBUG |
| ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); |
| #endif |
| } |
| } |
| |
| // Follow "prev" links in mCandidates array, and copy to result arrays. |
| void LineBreaker::finishBreaksOptimal() { |
| // clear existing greedy break result |
| mBreaks.clear(); |
| mWidths.clear(); |
| mFlags.clear(); |
| size_t nCand = mCandidates.size(); |
| size_t prev; |
| for (size_t i = nCand - 1; i > 0; i = prev) { |
| prev = mCandidates[i].prev; |
| mBreaks.push_back(mCandidates[i].offset); |
| mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); |
| int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType); |
| if (prev > 0) { |
| flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType); |
| } |
| mFlags.push_back(flags); |
| } |
| std::reverse(mBreaks.begin(), mBreaks.end()); |
| std::reverse(mWidths.begin(), mWidths.end()); |
| std::reverse(mFlags.begin(), mFlags.end()); |
| } |
| |
| void LineBreaker::computeBreaksOptimal(bool isRectangle) { |
| size_t active = 0; |
| size_t nCand = mCandidates.size(); |
| float width = mLineWidths.getLineWidth(0); |
| float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f; |
| |
| // "i" iterates through candidates for the end of the line. |
| for (size_t i = 1; i < nCand; i++) { |
| bool atEnd = i == nCand - 1; |
| float best = SCORE_INFTY; |
| size_t bestPrev = 0; |
| size_t lineNumberLast = 0; |
| |
| if (!isRectangle) { |
| size_t lineNumberLast = mCandidates[active].lineNumber; |
| width = mLineWidths.getLineWidth(lineNumberLast); |
| } |
| ParaWidth leftEdge = mCandidates[i].postBreak - width; |
| float bestHope = 0; |
| |
| // "j" iterates through candidates for the beginning of the line. |
| for (size_t j = active; j < i; j++) { |
| if (!isRectangle) { |
| size_t lineNumber = mCandidates[j].lineNumber; |
| if (lineNumber != lineNumberLast) { |
| float widthNew = mLineWidths.getLineWidth(lineNumber); |
| if (widthNew != width) { |
| leftEdge = mCandidates[i].postBreak - width; |
| bestHope = 0; |
| width = widthNew; |
| } |
| lineNumberLast = lineNumber; |
| } |
| } |
| float jScore = mCandidates[j].score; |
| if (jScore + bestHope >= best) continue; |
| float delta = mCandidates[j].preBreak - leftEdge; |
| |
| // compute width score for line |
| |
| // Note: the "bestHope" optimization makes the assumption that, when delta is |
| // non-negative, widthScore will increase monotonically as successive candidate |
| // breaks are considered. |
| float widthScore = 0.0f; |
| float additionalPenalty = 0.0f; |
| if ((atEnd || !mJustified) && delta < 0) { |
| widthScore = SCORE_OVERFULL; |
| } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { |
| // increase penalty for hyphen on last line |
| additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; |
| } else { |
| widthScore = delta * delta; |
| if (delta < 0) { |
| if (-delta < maxShrink * |
| (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) { |
| widthScore *= SHRINK_PENALTY_MULTIPLIER; |
| } else { |
| widthScore = SCORE_OVERFULL; |
| } |
| } |
| } |
| |
| if (delta < 0) { |
| active = j + 1; |
| } else { |
| bestHope = widthScore; |
| } |
| |
| float score = jScore + widthScore + additionalPenalty; |
| if (score <= best) { |
| best = score; |
| bestPrev = j; |
| } |
| } |
| mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; |
| mCandidates[i].prev = bestPrev; |
| mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; |
| #if VERBOSE_DEBUG |
| ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev); |
| #endif |
| } |
| finishBreaksOptimal(); |
| } |
| |
| size_t LineBreaker::computeBreaks() { |
| if (mStrategy == kBreakStrategy_Greedy) { |
| computeBreaksGreedy(); |
| } else { |
| computeBreaksOptimal(mLineWidths.isConstant()); |
| } |
| return mBreaks.size(); |
| } |
| |
| void LineBreaker::finish() { |
| mWordBreaker.finish(); |
| mWidth = 0; |
| mLineWidths.clear(); |
| mCandidates.clear(); |
| mBreaks.clear(); |
| mWidths.clear(); |
| mFlags.clear(); |
| if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { |
| mTextBuf.clear(); |
| mTextBuf.shrink_to_fit(); |
| mCharWidths.clear(); |
| mCharWidths.shrink_to_fit(); |
| mHyphBuf.clear(); |
| mHyphBuf.shrink_to_fit(); |
| mCandidates.shrink_to_fit(); |
| mBreaks.shrink_to_fit(); |
| mWidths.shrink_to_fit(); |
| mFlags.shrink_to_fit(); |
| } |
| mStrategy = kBreakStrategy_Greedy; |
| mHyphenationFrequency = kHyphenationFrequency_Normal; |
| mLinePenalty = 0.0f; |
| mJustified = false; |
| } |
| |
| } // namespace minikin |