Source/WebCore/platform/text/TextBreakIteratorICU.cpp - platform/external/chromium_org/third_party/WebKit - Git at Google

 /*
  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
  * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #include "config.h"
 #include "TextBreakIterator.h"

 #include "LineBreakIteratorPoolICU.h"
 #include <wtf/Atomics.h>
 #include <wtf/text/WTFString.h>

 using namespace WTF;
 using namespace std;

 namespace WebCore {

 static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
     UBreakIteratorType type, const UChar* string, int length)
 {
     if (!string)
         return 0;

     if (!createdIterator) {
         UErrorCode openStatus = U_ZERO_ERROR;
         iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
         createdIterator = true;
         ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
     }
     if (!iterator)
         return 0;

     UErrorCode setTextStatus = U_ZERO_ERROR;
     ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
     if (U_FAILURE(setTextStatus))
         return 0;

     return iterator;
 }

 static const int s_UTextCharacterBufferSize = 16;

 typedef struct {
     UText uTextStruct;
     UChar uCharBuffer[s_UTextCharacterBufferSize + 1];
 } UTextWithBuffer;

 static UText emptyUText = UTEXT_INITIALIZER;

 static UText* uTextLatin1Clone(UText*, const UText*, UBool, UErrorCode*);
 static int64_t uTextLatin1NativeLength(UText*);
 static UBool uTextLatin1Access(UText*, int64_t, UBool);
 static int32_t uTextLatin1Extract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
 static int64_t uTextLatin1MapOffsetToNative(const UText*);
 static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t);
 static void uTextLatin1Close(UText*);

 static struct UTextFuncs uTextLatin1Funcs = {
     sizeof(UTextFuncs),
     0, 0, 0,
     uTextLatin1Clone,
     uTextLatin1NativeLength,
     uTextLatin1Access,
     uTextLatin1Extract,
     0,
     0,
     uTextLatin1MapOffsetToNative,
     uTextLatin1MapNativeIndexToUTF16,
     uTextLatin1Close,
     0, 0, 0
 };

 static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
 {
     ASSERT_UNUSED(deep, !deep);

     if (U_FAILURE(*status))
         return 0;

     UText* result = utext_setup(destination, sizeof(UChar) * (s_UTextCharacterBufferSize + 1), status);
     if (U_FAILURE(*status))
         return destination;

     result->providerProperties = source->providerProperties;

     /* Point at the same position, but with an empty buffer */
     result->chunkNativeStart = source->chunkNativeStart;
     result->chunkNativeLimit = source->chunkNativeStart;
     result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart);
     result->chunkOffset = 0;
     result->context = source->context;
     result->a = source->a;
     result->pFuncs = &uTextLatin1Funcs;
     result->chunkContents = (UChar*)result->pExtra;
     memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * (s_UTextCharacterBufferSize + 1));

     return result;
 }

 static int64_t uTextLatin1NativeLength(UText* uText)
 {
     return uText->a;
 }

 static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward)
 {
     int64_t length = uText->a;

     if (forward) {
         if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) {
             /* Already inside the buffer. Set the new offset. */
             uText->chunkOffset = (int32_t)(index - uText->chunkNativeStart);
             return TRUE;
         }
         if (index >= length && uText->chunkNativeLimit == length) {
             /* Off the end of the buffer, but we can't get it. */
             uText->chunkOffset = uText->chunkLength;
             return FALSE;
         }
     } else {
         if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) {
             /* Already inside the buffer. Set the new offset. */
             uText->chunkOffset = (int32_t)(index - uText->chunkNativeStart);
             return TRUE;
         }
         if (!index && !uText->chunkNativeStart) {
             /* Already at the beginning; can't go any farther */
             uText->chunkOffset = 0;
             return FALSE;
         }
     }

     if (forward) {
         uText->chunkNativeStart = index;
         uText->chunkNativeLimit = uText->chunkNativeStart + s_UTextCharacterBufferSize;
         if (uText->chunkNativeLimit > length)
             uText->chunkNativeLimit = length;

         uText->chunkOffset = 0;
     } else {
         uText->chunkNativeLimit = index;
         if (uText->chunkNativeLimit > length)
             uText->chunkNativeLimit = length;

         uText->chunkNativeStart = uText->chunkNativeLimit -  s_UTextCharacterBufferSize;
         if (uText->chunkNativeStart < 0)
             uText->chunkNativeStart = 0;

         uText->chunkOffset = uText->chunkLength;
     }
     uText->chunkLength = (int32_t) (uText->chunkNativeLimit - uText->chunkNativeStart);

     StringImpl::copyChars(const_cast<UChar*>(uText->chunkContents), static_cast<const LChar*>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength));

     uText->nativeIndexingLimit = uText->chunkLength;

     return TRUE;
 }

 static int32_t uTextLatin1Extract(UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status)
 {
     int64_t length = uText->a;
     if (U_FAILURE(*status))
         return 0;

     if (destCapacity < 0 || (!dest && destCapacity > 0)) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }

     if (start < 0 || start > limit || (limit - start) > INT32_MAX) {
         *status = U_INDEX_OUTOFBOUNDS_ERROR;
         return 0;
     }

     if (start > length)
         start = length;
     if (limit > length)
         limit = length;

     length = limit - start;

     if (!length)
         return 0;

     if (destCapacity > 0 && !dest) {
         int32_t trimmedLength = static_cast<int32_t>(length);
         if (trimmedLength > destCapacity)
             trimmedLength = destCapacity;

         StringImpl::copyChars(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength));
     }

     if (length < destCapacity) {
         dest[length] = 0;
         if (*status == U_STRING_NOT_TERMINATED_WARNING)
             *status = U_ZERO_ERROR;
     } else if (length == destCapacity)
         *status = U_STRING_NOT_TERMINATED_WARNING;
     else
         *status = U_BUFFER_OVERFLOW_ERROR;

     return static_cast<int32_t>(length);
 }

 static int64_t uTextLatin1MapOffsetToNative(const UText* uText)
 {
     return uText->chunkNativeStart + uText->chunkOffset;
 }

 static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex)
 {
     ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex);
     ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit);
     return static_cast<int32_t>(nativeIndex);
 }

 static void uTextLatin1Close(UText* uText)
 {
     uText->context = 0;
 }

 static UText* UTextOpenLatin1(UTextWithBuffer* uTextLatin1, const LChar* string, unsigned length, UErrorCode* errorCode)
 {
     UText* result = utext_setup(reinterpret_cast<UText*>(uTextLatin1), sizeof(UChar) * (s_UTextCharacterBufferSize + 1), errorCode);

     if (!U_SUCCESS(*errorCode))
         return 0;

     result->context = string;
     result->a = (int64_t)length;
     result->pFuncs = &uTextLatin1Funcs;
     result->chunkContents = (UChar*)result->pExtra;
     memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * (s_UTextCharacterBufferSize + 1));

     return result;
 }

 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
 {
     static bool createdWordBreakIterator = false;
     static TextBreakIterator* staticWordBreakIterator;
     return setUpIterator(createdWordBreakIterator,
         staticWordBreakIterator, UBRK_WORD, string, length);
 }

 TextBreakIterator* acquireLineBreakIterator(const LChar* string, int length, const AtomicString& locale)
 {
     UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
     if (!iterator)
         return 0;

     UTextWithBuffer uTextLatin1Local;
     uTextLatin1Local.uTextStruct = emptyUText;
     uTextLatin1Local.uTextStruct.extraSize = sizeof(uTextLatin1Local.uCharBuffer);
     uTextLatin1Local.uTextStruct.pExtra = uTextLatin1Local.uCharBuffer;

     UErrorCode uTextOpenStatus = U_ZERO_ERROR;
     UText* uTextLatin1 = UTextOpenLatin1(&uTextLatin1Local, string, length, &uTextOpenStatus);
     if (U_FAILURE(uTextOpenStatus)) {
         LOG_ERROR("UTextOpenLatin1 failed with status %d", uTextOpenStatus);
         return 0;
     }

     UErrorCode setTextStatus = U_ZERO_ERROR;
     ubrk_setUText(iterator, uTextLatin1, &setTextStatus);
     if (U_FAILURE(setTextStatus)) {
         LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
         return 0;
     }

     utext_close(uTextLatin1);

     return reinterpret_cast<TextBreakIterator*>(iterator);
 }

 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString& locale)
 {
     UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
     if (!iterator)
         return 0;

     UErrorCode setTextStatus = U_ZERO_ERROR;
     ubrk_setText(iterator, string, length, &setTextStatus);
     if (U_FAILURE(setTextStatus)) {
         LOG_ERROR("ubrk_setText failed with status %d", setTextStatus);
         return 0;
     }

     return reinterpret_cast<TextBreakIterator*>(iterator);
 }

 void releaseLineBreakIterator(TextBreakIterator* iterator)
 {
     ASSERT_ARG(iterator, iterator);

     LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
 }

 static TextBreakIterator* nonSharedCharacterBreakIterator;

 static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
 {
 #if ENABLE(COMPARE_AND_SWAP)
     return weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), expected, newValue);
 #else
     DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
     MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
     if (nonSharedCharacterBreakIterator != expected)
         return false;
     nonSharedCharacterBreakIterator = newValue;
     return true;
 #endif
 }

 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, int length)
 {
     m_iterator = nonSharedCharacterBreakIterator;
     bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
     m_iterator = setUpIterator(createdIterator, m_iterator, UBRK_CHARACTER, buffer, length);
 }

 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
 {
     if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
         ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
 }

 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
 {
     static bool createdSentenceBreakIterator = false;
     static TextBreakIterator* staticSentenceBreakIterator;
     return setUpIterator(createdSentenceBreakIterator,
         staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
 }

 int textBreakFirst(TextBreakIterator* iterator)
 {
     return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
 }

 int textBreakLast(TextBreakIterator* iterator)
 {
     return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
 }

 int textBreakNext(TextBreakIterator* iterator)
 {
     return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
 }

 int textBreakPrevious(TextBreakIterator* iterator)
 {
     return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
 }

 int textBreakPreceding(TextBreakIterator* iterator, int pos)
 {
     return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
 }

 int textBreakFollowing(TextBreakIterator* iterator, int pos)
 {
     return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
 }

 int textBreakCurrent(TextBreakIterator* iterator)
 {
     return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
 }

 bool isTextBreak(TextBreakIterator* iterator, int position)
 {
     return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
 }

 bool isWordTextBreak(TextBreakIterator* iterator)
 {
     int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator));
     return ruleStatus != UBRK_WORD_NONE;
 }

 static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator,
     const char* breakRules, const UChar* string, int length)
 {
     if (!string)
         return 0;

     if (!createdIterator) {
         UParseError parseStatus;
         UErrorCode openStatus = U_ZERO_ERROR;
         String rules(breakRules);
         iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus));
         createdIterator = true;
         ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
     }
     if (!iterator)
         return 0;

     UErrorCode setTextStatus = U_ZERO_ERROR;
     ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
     if (U_FAILURE(setTextStatus))
         return 0;

     return iterator;
 }

 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
 {
     // This rule set is based on character-break iterator rules of ICU 4.0
     // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
     // The major differences from the original ones are listed below:
     // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
     // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
     // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
     // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
     // * Added rules for regional indicator symbols.
     static const char* kRules =
         "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
         "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
         "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
         "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
         "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
         "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
         "$L       = [\\p{Grapheme_Cluster_Break = L}];"
         "$V       = [\\p{Grapheme_Cluster_Break = V}];"
         "$T       = [\\p{Grapheme_Cluster_Break = T}];"
         "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
         "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
         "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
         "$HinV    = \\u094D;"              // Devanagari Sign Virama
         "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
         "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
         "$BenV    = \\u09CD;"              // Bengali Sign Virama
         "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
         "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
         "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
         "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
         "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
         "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
         "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
         "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
         "$OriV    = \\u0B4D;"              // Oriya Sign Virama
         "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
         "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
         "$TelV    = \\u0C4D;"              // Telugu Sign Virama
         "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
         "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
         "$KanV    = \\u0CCD;"              // Kannada Sign Virama
         "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
         "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
         "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
         "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
         "$RI      = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
         "!!chain;"
         "!!forward;"
         "$CR $LF;"
         "$L ($L | $V | $LV | $LVT);"
         "($LV | $V) ($V | $T);"
         "($LVT | $T) $T;"
         "[^$Control $CR $LF] $Extend;"
         "[^$Control $CR $LF] $SpacingMark;"
         "$RI $RI / $RI;"
         "$RI $RI;"
         "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
         "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
         "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
         "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
         "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
         "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
         "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
         "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
         "!!reverse;"
         "$LF $CR;"
         "($L | $V | $LV | $LVT) $L;"
         "($V | $T) ($LV | $V);"
         "$T ($LVT | $T);"
         "$Extend      [^$Control $CR $LF];"
         "$SpacingMark [^$Control $CR $LF];"
         "$RI $RI / $RI $RI;"
         "$RI $RI;"
         "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
         "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
         "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
         "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
         "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
         "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
         "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
         "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
         "!!safe_reverse;"
         "!!safe_forward;";
     static bool createdCursorMovementIterator = false;
     static TextBreakIterator* staticCursorMovementIterator;
     return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length);
 }

 }
	/*
	* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
	* Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	* Boston, MA 02110-1301, USA.
	*
	*/

	#include "config.h"
	#include "TextBreakIterator.h"

	#include "LineBreakIteratorPoolICU.h"
	#include <wtf/Atomics.h>
	#include <wtf/text/WTFString.h>

	using namespace WTF;
	using namespace std;

	namespace WebCore {

	static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
	UBreakIteratorType type, const UChar* string, int length)
	{
	if (!string)
	return 0;

	if (!createdIterator) {
	UErrorCode openStatus = U_ZERO_ERROR;
	iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
	createdIterator = true;
	ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
	}
	if (!iterator)
	return 0;

	UErrorCode setTextStatus = U_ZERO_ERROR;
	ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
	if (U_FAILURE(setTextStatus))
	return 0;

	return iterator;
	}

	static const int s_UTextCharacterBufferSize = 16;

	typedef struct {
	UText uTextStruct;
	UChar uCharBuffer[s_UTextCharacterBufferSize + 1];
	} UTextWithBuffer;

	static UText emptyUText = UTEXT_INITIALIZER;

	static UText* uTextLatin1Clone(UText, const UText, UBool, UErrorCode*);
	static int64_t uTextLatin1NativeLength(UText*);
	static UBool uTextLatin1Access(UText*, int64_t, UBool);
	static int32_t uTextLatin1Extract(UText, int64_t, int64_t, UChar, int32_t, UErrorCode*);
	static int64_t uTextLatin1MapOffsetToNative(const UText*);
	static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t);
	static void uTextLatin1Close(UText*);

	static struct UTextFuncs uTextLatin1Funcs = {
	sizeof(UTextFuncs),
	0, 0, 0,
	uTextLatin1Clone,
	uTextLatin1NativeLength,
	uTextLatin1Access,
	uTextLatin1Extract,
	0,
	0,
	uTextLatin1MapOffsetToNative,
	uTextLatin1MapNativeIndexToUTF16,
	uTextLatin1Close,
	0, 0, 0
	};

	static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
	{
	ASSERT_UNUSED(deep, !deep);

	if (U_FAILURE(*status))
	return 0;

	UText* result = utext_setup(destination, sizeof(UChar) * (s_UTextCharacterBufferSize + 1), status);
	if (U_FAILURE(*status))
	return destination;

	result->providerProperties = source->providerProperties;

	/* Point at the same position, but with an empty buffer */
	result->chunkNativeStart = source->chunkNativeStart;
	result->chunkNativeLimit = source->chunkNativeStart;
	result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart);
	result->chunkOffset = 0;
	result->context = source->context;
	result->a = source->a;
	result->pFuncs = &uTextLatin1Funcs;
	result->chunkContents = (UChar*)result->pExtra;
	memset(const_cast<UChar>(result->chunkContents), 0, sizeof(UChar) (s_UTextCharacterBufferSize + 1));

	return result;
	}

	static int64_t uTextLatin1NativeLength(UText* uText)
	{
	return uText->a;
	}

	static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward)
	{
	int64_t length = uText->a;

	if (forward) {
	if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) {
	/* Already inside the buffer. Set the new offset. */
	uText->chunkOffset = (int32_t)(index - uText->chunkNativeStart);
	return TRUE;
	}
	if (index >= length && uText->chunkNativeLimit == length) {
	/* Off the end of the buffer, but we can't get it. */
	uText->chunkOffset = uText->chunkLength;
	return FALSE;
	}
	} else {
	if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) {
	/* Already inside the buffer. Set the new offset. */
	uText->chunkOffset = (int32_t)(index - uText->chunkNativeStart);
	return TRUE;
	}
	if (!index && !uText->chunkNativeStart) {
	/* Already at the beginning; can't go any farther */
	uText->chunkOffset = 0;
	return FALSE;
	}
	}

	if (forward) {
	uText->chunkNativeStart = index;
	uText->chunkNativeLimit = uText->chunkNativeStart + s_UTextCharacterBufferSize;
	if (uText->chunkNativeLimit > length)
	uText->chunkNativeLimit = length;

	uText->chunkOffset = 0;
	} else {
	uText->chunkNativeLimit = index;
	if (uText->chunkNativeLimit > length)
	uText->chunkNativeLimit = length;

	uText->chunkNativeStart = uText->chunkNativeLimit - s_UTextCharacterBufferSize;
	if (uText->chunkNativeStart < 0)
	uText->chunkNativeStart = 0;

	uText->chunkOffset = uText->chunkLength;
	}
	uText->chunkLength = (int32_t) (uText->chunkNativeLimit - uText->chunkNativeStart);

	StringImpl::copyChars(const_cast<UChar>(uText->chunkContents), static_cast<const LChar>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength));

	uText->nativeIndexingLimit = uText->chunkLength;

	return TRUE;
	}

	static int32_t uTextLatin1Extract(UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status)
	{
	int64_t length = uText->a;
	if (U_FAILURE(*status))
	return 0;

	if (destCapacity < 0 \|\| (!dest && destCapacity > 0)) {
	*status = U_ILLEGAL_ARGUMENT_ERROR;
	return 0;
	}

	if (start < 0 \|\| start > limit \|\| (limit - start) > INT32_MAX) {
	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	return 0;
	}

	if (start > length)
	start = length;
	if (limit > length)
	limit = length;

	length = limit - start;

	if (!length)
	return 0;

	if (destCapacity > 0 && !dest) {
	int32_t trimmedLength = static_cast<int32_t>(length);
	if (trimmedLength > destCapacity)
	trimmedLength = destCapacity;

	StringImpl::copyChars(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength));
	}

	if (length < destCapacity) {
	dest[length] = 0;
	if (*status == U_STRING_NOT_TERMINATED_WARNING)
	*status = U_ZERO_ERROR;
	} else if (length == destCapacity)
	*status = U_STRING_NOT_TERMINATED_WARNING;
	else
	*status = U_BUFFER_OVERFLOW_ERROR;

	return static_cast<int32_t>(length);
	}

	static int64_t uTextLatin1MapOffsetToNative(const UText* uText)
	{
	return uText->chunkNativeStart + uText->chunkOffset;
	}

	static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex)
	{
	ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex);
	ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit);
	return static_cast<int32_t>(nativeIndex);
	}

	static void uTextLatin1Close(UText* uText)
	{
	uText->context = 0;
	}

	static UText* UTextOpenLatin1(UTextWithBuffer* uTextLatin1, const LChar* string, unsigned length, UErrorCode* errorCode)
	{
	UText* result = utext_setup(reinterpret_cast<UText>(uTextLatin1), sizeof(UChar) (s_UTextCharacterBufferSize + 1), errorCode);

	if (!U_SUCCESS(*errorCode))
	return 0;

	result->context = string;
	result->a = (int64_t)length;
	result->pFuncs = &uTextLatin1Funcs;
	result->chunkContents = (UChar*)result->pExtra;
	memset(const_cast<UChar>(result->chunkContents), 0, sizeof(UChar) (s_UTextCharacterBufferSize + 1));

	return result;
	}

	TextBreakIterator* wordBreakIterator(const UChar* string, int length)
	{
	static bool createdWordBreakIterator = false;
	static TextBreakIterator* staticWordBreakIterator;
	return setUpIterator(createdWordBreakIterator,
	staticWordBreakIterator, UBRK_WORD, string, length);
	}

	TextBreakIterator* acquireLineBreakIterator(const LChar* string, int length, const AtomicString& locale)
	{
	UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
	if (!iterator)
	return 0;

	UTextWithBuffer uTextLatin1Local;
	uTextLatin1Local.uTextStruct = emptyUText;
	uTextLatin1Local.uTextStruct.extraSize = sizeof(uTextLatin1Local.uCharBuffer);
	uTextLatin1Local.uTextStruct.pExtra = uTextLatin1Local.uCharBuffer;

	UErrorCode uTextOpenStatus = U_ZERO_ERROR;
	UText* uTextLatin1 = UTextOpenLatin1(&uTextLatin1Local, string, length, &uTextOpenStatus);
	if (U_FAILURE(uTextOpenStatus)) {
	LOG_ERROR("UTextOpenLatin1 failed with status %d", uTextOpenStatus);
	return 0;
	}

	UErrorCode setTextStatus = U_ZERO_ERROR;
	ubrk_setUText(iterator, uTextLatin1, &setTextStatus);
	if (U_FAILURE(setTextStatus)) {
	LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
	return 0;
	}

	utext_close(uTextLatin1);

	return reinterpret_cast<TextBreakIterator*>(iterator);
	}

	TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString& locale)
	{
	UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
	if (!iterator)
	return 0;

	UErrorCode setTextStatus = U_ZERO_ERROR;
	ubrk_setText(iterator, string, length, &setTextStatus);
	if (U_FAILURE(setTextStatus)) {
	LOG_ERROR("ubrk_setText failed with status %d", setTextStatus);
	return 0;
	}

	return reinterpret_cast<TextBreakIterator*>(iterator);
	}

	void releaseLineBreakIterator(TextBreakIterator* iterator)
	{
	ASSERT_ARG(iterator, iterator);

	LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
	}

	static TextBreakIterator* nonSharedCharacterBreakIterator;

	static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
	{
	#if ENABLE(COMPARE_AND_SWAP)
	return weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), expected, newValue);
	#else
	DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
	MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
	if (nonSharedCharacterBreakIterator != expected)
	return false;
	nonSharedCharacterBreakIterator = newValue;
	return true;
	#endif
	}

	NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, int length)
	{
	m_iterator = nonSharedCharacterBreakIterator;
	bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
	m_iterator = setUpIterator(createdIterator, m_iterator, UBRK_CHARACTER, buffer, length);
	}

	NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
	{
	if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
	ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
	}

	TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
	{
	static bool createdSentenceBreakIterator = false;
	static TextBreakIterator* staticSentenceBreakIterator;
	return setUpIterator(createdSentenceBreakIterator,
	staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
	}

	int textBreakFirst(TextBreakIterator* iterator)
	{
	return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
	}

	int textBreakLast(TextBreakIterator* iterator)
	{
	return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
	}

	int textBreakNext(TextBreakIterator* iterator)
	{
	return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
	}

	int textBreakPrevious(TextBreakIterator* iterator)
	{
	return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
	}

	int textBreakPreceding(TextBreakIterator* iterator, int pos)
	{
	return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
	}

	int textBreakFollowing(TextBreakIterator* iterator, int pos)
	{
	return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
	}

	int textBreakCurrent(TextBreakIterator* iterator)
	{
	return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
	}

	bool isTextBreak(TextBreakIterator* iterator, int position)
	{
	return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
	}

	bool isWordTextBreak(TextBreakIterator* iterator)
	{
	int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator));
	return ruleStatus != UBRK_WORD_NONE;
	}

	static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator,
	const char* breakRules, const UChar* string, int length)
	{
	if (!string)
	return 0;

	if (!createdIterator) {
	UParseError parseStatus;
	UErrorCode openStatus = U_ZERO_ERROR;
	String rules(breakRules);
	iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus));
	createdIterator = true;
	ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
	}
	if (!iterator)
	return 0;

	UErrorCode setTextStatus = U_ZERO_ERROR;
	ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
	if (U_FAILURE(setTextStatus))
	return 0;

	return iterator;
	}

	TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
	{
	// This rule set is based on character-break iterator rules of ICU 4.0
	// <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
	// The major differences from the original ones are listed below:
	// * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
	// * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
	// * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
	// * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
	// * Added rules for regional indicator symbols.
	static const char* kRules =
	"$CR = [\\p{Grapheme_Cluster_Break = CR}];"
	"$LF = [\\p{Grapheme_Cluster_Break = LF}];"
	"$Control = [\\p{Grapheme_Cluster_Break = Control}];"
	"$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks
	"$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
	"$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
	"$L = [\\p{Grapheme_Cluster_Break = L}];"
	"$V = [\\p{Grapheme_Cluster_Break = V}];"
	"$T = [\\p{Grapheme_Cluster_Break = T}];"
	"$LV = [\\p{Grapheme_Cluster_Break = LV}];"
	"$LVT = [\\p{Grapheme_Cluster_Break = LVT}];"
	"$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha
	"$HinV = \\u094D;" // Devanagari Sign Virama
	"$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha
	"$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha
	"$BenV = \\u09CD;" // Bengali Sign Virama
	"$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha
	"$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha
	"$PanV = \\u0A4D;" // Gurmukhi Sign Virama
	"$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha
	"$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha
	"$GujV = \\u0ACD;" // Gujarati Sign Virama
	"$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha
	"$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha
	"$OriV = \\u0B4D;" // Oriya Sign Virama
	"$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha
	"$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha
	"$TelV = \\u0C4D;" // Telugu Sign Virama
	"$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha
	"$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha
	"$KanV = \\u0CCD;" // Kannada Sign Virama
	"$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha
	"$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha
	"$MalV = \\u0D4D;" // Malayalam Sign Virama
	"$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha
	"$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
	"!!chain;"
	"!!forward;"
	"$CR $LF;"
	"$L ($L \| $V \| $LV \| $LVT);"
	"($LV \| $V) ($V \| $T);"
	"($LVT \| $T) $T;"
	"[^$Control $CR $LF] $Extend;"
	"[^$Control $CR $LF] $SpacingMark;"
	"$RI $RI / $RI;"
	"$RI $RI;"
	"$Hin0 $HinV $Hin1;" // Devanagari Virama (forward)
	"$Ben0 $BenV $Ben1;" // Bengali Virama (forward)
	"$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward)
	"$Guj0 $GujV $Guj1;" // Gujarati Virama (forward)
	"$Ori0 $OriV $Ori1;" // Oriya Virama (forward)
	"$Tel0 $TelV $Tel1;" // Telugu Virama (forward)
	"$Kan0 $KanV $Kan1;" // Kannada Virama (forward)
	"$Mal0 $MalV $Mal1;" // Malayalam Virama (forward)
	"!!reverse;"
	"$LF $CR;"
	"($L \| $V \| $LV \| $LVT) $L;"
	"($V \| $T) ($LV \| $V);"
	"$T ($LVT \| $T);"
	"$Extend [^$Control $CR $LF];"
	"$SpacingMark [^$Control $CR $LF];"
	"$RI $RI / $RI $RI;"
	"$RI $RI;"
	"$Hin1 $HinV $Hin0;" // Devanagari Virama (backward)
	"$Ben1 $BenV $Ben0;" // Bengali Virama (backward)
	"$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward)
	"$Guj1 $GujV $Guj0;" // Gujarati Virama (backward)
	"$Ori1 $OriV $Ori0;" // Gujarati Virama (backward)
	"$Tel1 $TelV $Tel0;" // Telugu Virama (backward)
	"$Kan1 $KanV $Kan0;" // Kannada Virama (backward)
	"$Mal1 $MalV $Mal0;" // Malayalam Virama (backward)
	"!!safe_reverse;"
	"!!safe_forward;";
	static bool createdCursorMovementIterator = false;
	static TextBreakIterator* staticCursorMovementIterator;
	return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length);
	}

	}