blob: f6952a93cdb116770146f50fb61e613a87e5e1a2 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "GreedyLineBreak"
#include "minikin/Characters.h"
#include "minikin/LineBreaker.h"
#include "minikin/MeasuredText.h"
#include "minikin/Range.h"
#include "minikin/U16StringPiece.h"
#include "HyphenatorMap.h"
#include "LineBreakerUtil.h"
#include "Locale.h"
#include "LocaleListCache.h"
#include "WordBreaker.h"
namespace minikin {
namespace {
constexpr uint32_t NOWHERE = 0xFFFFFFFF;
class GreedyLineBreaker {
public:
// User of this class must keep measured, lineWidthLimit, tabStop alive until the instance is
// destructed.
GreedyLineBreaker(const U16StringPiece& textBuf, const MeasuredText& measured,
const LineWidth& lineWidthLimits, const TabStops& tabStops,
bool enableHyphenation)
: mLineWidthLimit(lineWidthLimits.getAt(0)),
mTextBuf(textBuf),
mMeasuredText(measured),
mLineWidthLimits(lineWidthLimits),
mTabStops(tabStops),
mEnableHyphenation(enableHyphenation) {}
void process();
LineBreakResult getResult() const;
private:
struct BreakPoint {
BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
EndHyphenEdit endHyphen)
: offset(offset),
lineWidth(lineWidth),
hyphenEdit(packHyphenEdit(startHyphen, endHyphen)) {}
uint32_t offset;
float lineWidth;
HyphenEdit hyphenEdit;
};
inline uint32_t getPrevLineBreakOffset() {
return mBreakPoints.empty() ? 0 : mBreakPoints.back().offset;
}
// Registers the break point and prepares for next line computation.
void breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
float remainingNextSumOfCharWidths, EndHyphenEdit thisLineEndHyphen,
StartHyphenEdit nextLineStartHyphen);
// Update current line width.
void updateLineWidth(uint16_t c, float width);
// Break line if current line exceeds the line limit.
void processLineBreak(uint32_t offset, WordBreaker* breaker, bool doHyphenation);
// Try to break with previous word boundary.
// Returns false if unable to break by word boundary.
bool tryLineBreakWithWordBreak();
// Try to break with hyphenation.
// Returns false if unable to hyphenate.
//
// This method keeps hyphenation until the line width after line break meets the line width
// limit.
bool tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker);
// Do line break with each characters.
//
// This method only breaks at the first offset which has the longest width for the line width
// limit. This method don't keep line breaking even if the rest of the word exceeds the line
// width limit.
// This method return true if there is no characters to be processed.
bool doLineBreakWithGraphemeBounds(const Range& range);
// Info about the line currently processing.
uint32_t mLineNum = 0;
double mLineWidth = 0;
double mSumOfCharWidths = 0;
double mLineWidthLimit;
StartHyphenEdit mStartHyphenEdit = StartHyphenEdit::NO_EDIT;
// Previous word break point info.
uint32_t mPrevWordBoundsOffset = NOWHERE;
double mLineWidthAtPrevWordBoundary = 0;
double mSumOfCharWidthsAtPrevWordBoundary = 0;
bool mIsPrevWordBreakIsInEmailOrUrl = false;
// The hyphenator currently used.
const Hyphenator* mHyphenator = nullptr;
// Input parameters.
const U16StringPiece& mTextBuf;
const MeasuredText& mMeasuredText;
const LineWidth& mLineWidthLimits;
const TabStops& mTabStops;
bool mEnableHyphenation;
// The result of line breaking.
std::vector<BreakPoint> mBreakPoints;
MINIKIN_PREVENT_COPY_ASSIGN_AND_MOVE(GreedyLineBreaker);
};
void GreedyLineBreaker::breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
float remainingNextSumOfCharWidths,
EndHyphenEdit thisLineEndHyphen,
StartHyphenEdit nextLineStartHyphen) {
// First, push the break to result.
mBreakPoints.emplace_back(offset, lineWidth, mStartHyphenEdit, thisLineEndHyphen);
// Update the current line info.
mLineWidthLimit = mLineWidthLimits.getAt(++mLineNum);
mLineWidth = remainingNextLineWidth;
mSumOfCharWidths = remainingNextSumOfCharWidths;
mStartHyphenEdit = nextLineStartHyphen;
mPrevWordBoundsOffset = NOWHERE;
mLineWidthAtPrevWordBoundary = 0;
mSumOfCharWidthsAtPrevWordBoundary = 0;
mIsPrevWordBreakIsInEmailOrUrl = false;
}
bool GreedyLineBreaker::tryLineBreakWithWordBreak() {
if (mPrevWordBoundsOffset == NOWHERE) {
return false; // No word break point before..
}
breakLineAt(mPrevWordBoundsOffset, // break offset
mLineWidthAtPrevWordBoundary, // line width
mLineWidth - mSumOfCharWidthsAtPrevWordBoundary, // remaining next line width
// remaining next sum of char widths.
mSumOfCharWidths - mSumOfCharWidthsAtPrevWordBoundary, EndHyphenEdit::NO_EDIT,
StartHyphenEdit::NO_EDIT); // No hyphen modification.
return true;
}
bool GreedyLineBreaker::tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker) {
if (!mEnableHyphenation || mHyphenator == nullptr) {
return false;
}
Run* targetRun = nullptr;
for (const auto& run : mMeasuredText.runs) {
if (run->getRange().contains(range)) {
targetRun = run.get();
}
}
if (targetRun == nullptr) {
return false; // The target range may lay on multiple run. Unable to hyphenate.
}
const Range targetRange = breaker->wordRange();
if (!range.contains(targetRange)) {
return false;
}
const std::vector<HyphenationType> hyphenResult =
hyphenate(mTextBuf.substr(targetRange), *mHyphenator);
Range contextRange = range;
uint32_t prevOffset = NOWHERE;
float prevWidth = 0;
// Look up the hyphenation point from the begining.
for (uint32_t i = targetRange.getStart(); i < targetRange.getEnd(); ++i) {
const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(i)];
if (hyph == HyphenationType::DONT_BREAK) {
continue; // Not a hyphenation point.
}
const float width =
targetRun->measureHyphenPiece(mTextBuf, contextRange.split(i).first,
mStartHyphenEdit, editForThisLine(hyph), nullptr);
if (width <= mLineWidthLimit) {
// There are still space, remember current offset and look up next hyphenation point.
prevOffset = i;
prevWidth = width;
continue;
}
if (prevOffset == NOWHERE) {
// Even with hyphenation, the piece is too long for line. Give up and break in
// character bounds.
doLineBreakWithGraphemeBounds(contextRange);
} else {
// Previous offset is the longest hyphenation piece. Break with it.
const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
const float remainingCharWidths = targetRun->measureHyphenPiece(
mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
EndHyphenEdit::NO_EDIT, nullptr);
breakLineAt(prevOffset, prevWidth,
remainingCharWidths - (mSumOfCharWidths - mLineWidth), remainingCharWidths,
editForThisLine(hyph), nextLineStartHyphenEdit);
}
if (mLineWidth <= mLineWidthLimit) {
// The remaining hyphenation piece is less than line width. No more hyphenation is
// needed. Go to next word.
return true;
}
// Even after line break, the remaining hyphenation piece is still too long for the limit.
// Keep hyphenating for the rest.
i = getPrevLineBreakOffset();
contextRange.setStart(i); // Update the hyphenation start point.
prevOffset = NOWHERE;
}
// Do the same line break at the end of text.
// TODO: Remove code duplication. This is the same as in the for loop but extracting function
// may not clear.
if (prevOffset == NOWHERE) {
doLineBreakWithGraphemeBounds(contextRange);
} else {
const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
const float remainingCharWidths = targetRun->measureHyphenPiece(
mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
EndHyphenEdit::NO_EDIT, nullptr);
breakLineAt(prevOffset, prevWidth, remainingCharWidths - (mSumOfCharWidths - mLineWidth),
remainingCharWidths, editForThisLine(hyph), nextLineStartHyphenEdit);
}
return true;
}
// TODO: Respect trailing line end spaces.
bool GreedyLineBreaker::doLineBreakWithGraphemeBounds(const Range& range) {
double width = mMeasuredText.widths[range.getStart()];
// Starting from + 1 since at least one character needs to be assigned to a line.
for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
const float w = mMeasuredText.widths[i];
if (w == 0) {
continue; // w == 0 means here is not a grapheme bounds. Don't break here.
}
if (width + w > mLineWidthLimit) {
// Okay, here is the longest position.
breakLineAt(i, width, mLineWidth - width, mSumOfCharWidths - width,
EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
// This method only breaks at the first longest offset, since we may want to hyphenate
// the rest of the word.
return false;
} else {
width += w;
}
}
// Reaching here means even one character (or cluster) doesn't fit the line.
// Give up and break at the end of this range.
breakLineAt(range.getEnd(), mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
return true;
}
void GreedyLineBreaker::updateLineWidth(uint16_t c, float width) {
if (c == CHAR_TAB) {
mSumOfCharWidths = mTabStops.nextTab(mSumOfCharWidths);
mLineWidth = mSumOfCharWidths;
} else {
mSumOfCharWidths += width;
if (!isLineEndSpace(c)) {
mLineWidth = mSumOfCharWidths;
}
}
}
void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker,
bool doHyphenation) {
while (mLineWidth > mLineWidthLimit) {
const Range lineRange(getPrevLineBreakOffset(), offset); // The range we need to address.
if (tryLineBreakWithWordBreak()) {
continue; // The word in the new line may still be too long for the line limit.
} else if (doHyphenation && tryLineBreakWithHyphenation(lineRange, breaker)) {
continue; // TODO: we may be able to return here.
} else {
if (doLineBreakWithGraphemeBounds(lineRange)) {
return;
}
}
}
// There is still spaces, remember current word break point as a candidate and wait next word.
const bool isInEmailOrUrl = breaker->breakBadness() != 0;
if (mPrevWordBoundsOffset == NOWHERE || mIsPrevWordBreakIsInEmailOrUrl | !isInEmailOrUrl) {
mPrevWordBoundsOffset = offset;
mLineWidthAtPrevWordBoundary = mLineWidth;
mSumOfCharWidthsAtPrevWordBoundary = mSumOfCharWidths;
mIsPrevWordBreakIsInEmailOrUrl = isInEmailOrUrl;
}
}
void GreedyLineBreaker::process() {
WordBreaker wordBreaker;
wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
// Following two will be initialized after the first iteration.
uint32_t localeListId = LocaleListCache::kInvalidListId;
uint32_t nextWordBoundaryOffset = 0;
for (const auto& run : mMeasuredText.runs) {
const Range range = run->getRange();
// Update locale if necessary.
uint32_t newLocaleListId = run->getLocaleListId();
if (localeListId != newLocaleListId) {
Locale locale = getEffectiveLocale(newLocaleListId);
nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, range.getStart());
mHyphenator = HyphenatorMap::lookup(locale);
localeListId = newLocaleListId;
}
for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
updateLineWidth(mTextBuf[i], mMeasuredText.widths[i]);
if ((i + 1) == nextWordBoundaryOffset) {
// Only process line break at word boundary and the run can break into some pieces.
if (run->canBreak() || nextWordBoundaryOffset == range.getEnd()) {
processLineBreak(i + 1, &wordBreaker, run->canBreak());
}
nextWordBoundaryOffset = wordBreaker.next();
}
}
}
if (getPrevLineBreakOffset() != mTextBuf.size() && mPrevWordBoundsOffset != NOWHERE) {
// The remaining words in the last line.
breakLineAt(mPrevWordBoundsOffset, mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT,
StartHyphenEdit::NO_EDIT);
}
}
LineBreakResult GreedyLineBreaker::getResult() const {
constexpr int TAB_BIT = 1 << 29; // Must be the same in StaticLayout.java
LineBreakResult out;
uint32_t prevBreakOffset = 0;
for (const auto& breakPoint : mBreakPoints) {
// TODO: compute these during line breaking if these takes longer time.
bool hasTabChar = false;
for (uint32_t i = prevBreakOffset; i < breakPoint.offset; ++i) {
hasTabChar |= mTextBuf[i] == CHAR_TAB;
}
MinikinExtent extent =
mMeasuredText.getExtent(mTextBuf, Range(prevBreakOffset, breakPoint.offset));
out.breakPoints.push_back(breakPoint.offset);
out.widths.push_back(breakPoint.lineWidth);
out.ascents.push_back(extent.ascent);
out.descents.push_back(extent.descent);
out.flags.push_back((hasTabChar ? TAB_BIT : 0) | static_cast<int>(breakPoint.hyphenEdit));
prevBreakOffset = breakPoint.offset;
}
return out;
}
} // namespace
LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredText& measured,
const LineWidth& lineWidthLimits, const TabStops& tabStops,
bool enableHyphenation) {
if (textBuf.size() == 0) {
return LineBreakResult();
}
GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation);
lineBreaker.process();
return lineBreaker.getResult();
}
} // namespace minikin