/* | |
* Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* | |
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' | |
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS | |
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |
* THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
#include "config.h" | |
#include "break_lines.h" | |
#include "CharacterNames.h" | |
#include "TextBreakIterator.h" | |
#if PLATFORM(MAC) | |
#include <CoreServices/CoreServices.h> | |
#endif | |
namespace WebCore { | |
static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak) | |
{ | |
switch (ch) { | |
case ' ': | |
case '\n': | |
case '\t': | |
return true; | |
case noBreakSpace: | |
return treatNoBreakSpaceAsBreak; | |
default: | |
return false; | |
} | |
} | |
// This differs from the Unicode algorithm only in that Unicode does not break | |
// between a question mark and a vertical line (U+007C). | |
static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = { | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . / | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ? | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ] | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 // } | |
}; | |
static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable); | |
static inline bool shouldBreakAfter(UChar ch, UChar nextCh) | |
{ | |
switch (ch) { | |
// For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false. | |
// For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer. | |
case '?': | |
return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh]; | |
// Internet Explorer always allows breaking after a hyphen. | |
case '-': | |
case softHyphen: | |
// FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0 | |
// which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>. | |
// We may want to remove or conditionalize this workaround at some point. | |
case ideographicComma: | |
case ideographicFullStop: | |
return true; | |
default: | |
return false; | |
} | |
} | |
static inline bool needsLineBreakIterator(UChar ch) | |
{ | |
return ch > 0x7F && ch != noBreakSpace; | |
} | |
#if PLATFORM(MAC) && defined(BUILDING_ON_TIGER) | |
static inline TextBreakLocatorRef lineBreakLocator() | |
{ | |
TextBreakLocatorRef locator = 0; | |
UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator); | |
return locator; | |
} | |
#endif | |
int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak) | |
{ | |
#if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) | |
TextBreakIterator* breakIterator = 0; | |
#endif | |
int nextBreak = -1; | |
UChar lastCh = pos > 0 ? str[pos - 1] : 0; | |
for (int i = pos; i < len; i++) { | |
UChar ch = str[i]; | |
if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch)) | |
return i; | |
if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) { | |
if (nextBreak < i && i) { | |
#if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) | |
if (!breakIterator) | |
breakIterator = lineBreakIterator(str, len); | |
if (breakIterator) | |
nextBreak = textBreakFollowing(breakIterator, i - 1); | |
#else | |
static TextBreakLocatorRef breakLocator = lineBreakLocator(); | |
if (breakLocator) { | |
UniCharArrayOffset nextUCBreak; | |
if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0) | |
nextBreak = nextUCBreak; | |
} | |
#endif | |
} | |
if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak)) | |
return i; | |
} | |
lastCh = ch; | |
} | |
return len; | |
} | |
} // namespace WebCore |