Source/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp - platform/external/chromium_org/third_party/WebKit - Git at Google

 /*
  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
  * Copyright (C) 2007 Apple Inc. All rights reserved.
  * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
  * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
  * Copyright (C) 2010 Igalia S.L.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #include "config.h"
 #include "TextBreakIterator.h"

 #include <wtf/Atomics.h>
 #include <wtf/gobject/GOwnPtr.h>
 #include <pango/pango.h>

 using namespace WTF;
 using namespace std;

 #define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)

 namespace WebCore {

 class CharacterIterator {
 public:
     bool setText(const UChar* string, int length);
     const gchar* getText() { return m_utf8.get(); }
     int getLength() { return m_length; }
     glong getSize() { return m_size; }
     void setIndex(int index);
     int getIndex() { return m_index; }
     void setUTF16Index(int index);
     int getUTF16Index() { return m_utf16Index; }
     int getUTF16Length() { return m_utf16Length; }
     int first();
     int last();
     int next();
     int previous();
 private:
     int characterSize(int index);

     GOwnPtr<char> m_utf8;
     int m_length;
     long m_size;
     int m_index;
     int m_utf16Index;
     int m_utf16Length;
 };

 int CharacterIterator::characterSize(int index)
 {
     if (index == m_length || index < 0)
         return 0;
     if (m_length == m_utf16Length)
         return 1;

     gchar* indexPtr = g_utf8_offset_to_pointer(m_utf8.get(), index);
     gunichar character = g_utf8_get_char(indexPtr);
     return UTF8_IS_SURROGATE(character) ? 2 : 1;
 }

 bool CharacterIterator::setText(const UChar* string, int length)
 {
     long utf8Size = 0;
     m_utf8.set(g_utf16_to_utf8(string, length, 0, &utf8Size, 0));
     if (!utf8Size)
         return false;

     m_utf16Length = length;
     m_length = g_utf8_strlen(m_utf8.get(), utf8Size);
     m_size = utf8Size;
     m_index = 0;
     m_utf16Index = 0;

     return true;
 }

 void CharacterIterator::setIndex(int index)
 {
     if (index == m_index)
         return;
     if (index <= 0)
         m_index = m_utf16Index = 0;
     else if (index >= m_length) {
         m_index = m_length;
         m_utf16Index = m_utf16Length;
     } else if (m_length == m_utf16Length)
         m_index = m_utf16Index = index;
     else {
         m_index = index;
         int utf16Index = 0;
         int utf8Index = 0;
         while (utf8Index < index) {
             utf16Index += characterSize(utf8Index);
             utf8Index++;
         }
         m_utf16Index = utf16Index;
     }
 }

 void CharacterIterator::setUTF16Index(int index)
 {
     if (index == m_utf16Index)
         return;
     if (index <= 0)
         m_utf16Index = m_index = 0;
     else if (index >= m_utf16Length) {
         m_utf16Index = m_utf16Length;
         m_index = m_length;
     } else if (m_length == m_utf16Length)
         m_utf16Index = m_index = index;
     else {
         m_utf16Index = index;
         int utf16Index = 0;
         int utf8Index = 0;
         while (utf16Index < index) {
             utf16Index += characterSize(utf8Index);
             utf8Index++;
         }
         m_index = utf8Index;
     }
 }

 int CharacterIterator::first()
 {
     m_index = m_utf16Index = 0;
     return m_index;
 }

 int CharacterIterator::last()
 {
     m_index = m_length;
     m_utf16Index = m_utf16Length;
     return m_index;
 }

 int CharacterIterator::next()
 {
     int next = m_index + 1;

     if (next <= m_length) {
         m_utf16Index = min(m_utf16Index + characterSize(m_index), m_utf16Length);
         m_index = next;
     } else {
         m_index = TextBreakDone;
         m_utf16Index = TextBreakDone;
     }

     return m_index;
 }

 int CharacterIterator::previous()
 {
     int previous = m_index - 1;

     if (previous >= 0) {
         m_utf16Index = max(m_utf16Index - characterSize(previous), 0);
         m_index = previous;
     } else {
         m_index = TextBreakDone;
         m_utf16Index = TextBreakDone;
     }

     return m_index;
 }

 enum UBreakIteratorType {
     UBRK_CHARACTER,
     UBRK_WORD,
     UBRK_LINE,
     UBRK_SENTENCE
 };

 class TextBreakIterator {
 public:
     UBreakIteratorType m_type;
     PangoLogAttr* m_logAttrs;
     CharacterIterator m_charIterator;
 };

 static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
     UBreakIteratorType type, const UChar* string, int length)
 {
     if (!string)
         return 0;

     if (!createdIterator) {
         iterator = new TextBreakIterator();
         createdIterator = true;
     }
     if (!iterator)
         return 0;

     if (!iterator->m_charIterator.setText(string, length))
         return 0;

     int charLength = iterator->m_charIterator.getLength();

     iterator->m_type = type;
     if (createdIterator)
         g_free(iterator->m_logAttrs);
     iterator->m_logAttrs = g_new0(PangoLogAttr, charLength + 1);
     pango_get_log_attrs(iterator->m_charIterator.getText(), iterator->m_charIterator.getSize(),
                         -1, 0, iterator->m_logAttrs, charLength + 1);

     return iterator;
 }

 static TextBreakIterator* nonSharedCharacterBreakIterator;

 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, int length)
 {
     m_iterator = nonSharedCharacterBreakIterator;
     bool createdIterator = m_iterator && weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), m_iterator, 0);
     m_iterator = setUpIterator(createdIterator, m_iterator, UBRK_CHARACTER, buffer, length);
 }

 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
 {
     if (!weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), 0, m_iterator))
         delete m_iterator;
 }

 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
 {
     // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
     static bool createdCursorMovementIterator = false;
     static TextBreakIterator* staticCursorMovementIterator;
     return setUpIterator(createdCursorMovementIterator, staticCursorMovementIterator, UBRK_CHARACTER, string, length);
 }

 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
 {
     static bool createdWordBreakIterator = false;
     static TextBreakIterator* staticWordBreakIterator;
     return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
 }

 static bool createdLineBreakIterator = false;
 static TextBreakIterator* staticLineBreakIterator;

 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString&)
 {
     TextBreakIterator* lineBreakIterator = 0;
     if (!createdLineBreakIterator || staticLineBreakIterator) {
         setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
         swap(staticLineBreakIterator, lineBreakIterator);
     }

     if (!lineBreakIterator) {
         bool createdNewLineBreakIterator = false;
         setUpIterator(createdNewLineBreakIterator, lineBreakIterator, UBRK_LINE, string, length);
     }

     return lineBreakIterator;
 }

 void releaseLineBreakIterator(TextBreakIterator* iterator)
 {
     ASSERT(createdLineBreakIterator);
     ASSERT(iterator);

     if (!staticLineBreakIterator)
         staticLineBreakIterator = iterator;
     else
         delete iterator;
 }

 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
 {
     static bool createdSentenceBreakIterator = false;
     static TextBreakIterator* staticSentenceBreakIterator;
     return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
 }

 int textBreakFirst(TextBreakIterator* iterator)
 {
     iterator->m_charIterator.first();
     return iterator->m_charIterator.getUTF16Index();
 }

 int textBreakLast(TextBreakIterator* iterator)
 {
     // TextBreakLast is not meant to find just any break according to bi->m_type
     // but really the one near the last character.
     // (cmp ICU documentation for ubrk_first and ubrk_last)
     // From ICU docs for ubrk_last:
     // "Determine the index immediately beyond the last character in the text being scanned."

     // So we should advance or traverse back based on bi->m_logAttrs cursor positions.
     // If last character position in the original string is a whitespace,
     // traverse to the left until the first non-white character position is found
     // and return the position of the first white-space char after this one.
     // Otherwise return m_length, as "the first character beyond the last" is outside our string.

     bool whiteSpaceAtTheEnd = true;
     int nextWhiteSpacePos = iterator->m_charIterator.getLength();

     int pos = iterator->m_charIterator.last();
     while (pos >= 0 && whiteSpaceAtTheEnd) {
         if (iterator->m_logAttrs[pos].is_cursor_position) {
             if (whiteSpaceAtTheEnd = iterator->m_logAttrs[pos].is_white)
                 nextWhiteSpacePos = pos;
         }
         pos = iterator->m_charIterator.previous();
     }
     iterator->m_charIterator.setIndex(nextWhiteSpacePos);
     return iterator->m_charIterator.getUTF16Index();
 }

 int textBreakNext(TextBreakIterator* iterator)
 {
     while (iterator->m_charIterator.next() != TextBreakDone) {
         int index = iterator->m_charIterator.getIndex();

         // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €,
         // are not marked as word_start & word_end as opposed to the way ICU does it.
         // This leads to - for example - different word selection behaviour when right clicking.

         if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
             || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end))
             || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
             || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
             break;
         }
     }
     return iterator->m_charIterator.getUTF16Index();
 }

 int textBreakPrevious(TextBreakIterator* iterator)
 {
     while (iterator->m_charIterator.previous() != TextBreakDone) {
         int index = iterator->m_charIterator.getIndex();

         if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
             || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end))
             || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
             || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
             break;
         }
     }
     return iterator->m_charIterator.getUTF16Index();
 }

 int textBreakPreceding(TextBreakIterator* iterator, int offset)
 {
     if (offset > iterator->m_charIterator.getUTF16Length())
         return TextBreakDone;
     if (offset < 0)
         return 0;
     iterator->m_charIterator.setUTF16Index(offset);
     return textBreakPrevious(iterator);
 }

 int textBreakFollowing(TextBreakIterator* iterator, int offset)
 {
     if (offset > iterator->m_charIterator.getUTF16Length())
         return TextBreakDone;
     if (offset < 0)
         return 0;
     iterator->m_charIterator.setUTF16Index(offset);
     return textBreakNext(iterator);
 }

 int textBreakCurrent(TextBreakIterator* iterator)
 {
     return iterator->m_charIterator.getUTF16Index();
 }

 bool isTextBreak(TextBreakIterator* iterator, int offset)
 {
     if (!offset)
         return true;
     if (offset > iterator->m_charIterator.getUTF16Length())
         return false;

     iterator->m_charIterator.setUTF16Index(offset);

     int index = iterator->m_charIterator.getIndex();
     iterator->m_charIterator.previous();
     textBreakNext(iterator);
     return iterator->m_charIterator.getIndex() == index;
 }

 bool isWordTextBreak(TextBreakIterator*)
 {
     return true;
 }

 }
	/*
	* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
	* Copyright (C) 2007 Apple Inc. All rights reserved.
	* Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
	* Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
	* Copyright (C) 2010 Igalia S.L.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	* Boston, MA 02110-1301, USA.
	*
	*/

	#include "config.h"
	#include "TextBreakIterator.h"

	#include <wtf/Atomics.h>
	#include <wtf/gobject/GOwnPtr.h>
	#include <pango/pango.h>

	using namespace WTF;
	using namespace std;

	#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)

	namespace WebCore {

	class CharacterIterator {
	public:
	bool setText(const UChar* string, int length);
	const gchar* getText() { return m_utf8.get(); }
	int getLength() { return m_length; }
	glong getSize() { return m_size; }
	void setIndex(int index);
	int getIndex() { return m_index; }
	void setUTF16Index(int index);
	int getUTF16Index() { return m_utf16Index; }
	int getUTF16Length() { return m_utf16Length; }
	int first();
	int last();
	int next();
	int previous();
	private:
	int characterSize(int index);

	GOwnPtr<char> m_utf8;
	int m_length;
	long m_size;
	int m_index;
	int m_utf16Index;
	int m_utf16Length;
	};

	int CharacterIterator::characterSize(int index)
	{
	if (index == m_length \|\| index < 0)
	return 0;
	if (m_length == m_utf16Length)
	return 1;

	gchar* indexPtr = g_utf8_offset_to_pointer(m_utf8.get(), index);
	gunichar character = g_utf8_get_char(indexPtr);
	return UTF8_IS_SURROGATE(character) ? 2 : 1;
	}

	bool CharacterIterator::setText(const UChar* string, int length)
	{
	long utf8Size = 0;
	m_utf8.set(g_utf16_to_utf8(string, length, 0, &utf8Size, 0));
	if (!utf8Size)
	return false;

	m_utf16Length = length;
	m_length = g_utf8_strlen(m_utf8.get(), utf8Size);
	m_size = utf8Size;
	m_index = 0;
	m_utf16Index = 0;

	return true;
	}

	void CharacterIterator::setIndex(int index)
	{
	if (index == m_index)
	return;
	if (index <= 0)
	m_index = m_utf16Index = 0;
	else if (index >= m_length) {
	m_index = m_length;
	m_utf16Index = m_utf16Length;
	} else if (m_length == m_utf16Length)
	m_index = m_utf16Index = index;
	else {
	m_index = index;
	int utf16Index = 0;
	int utf8Index = 0;
	while (utf8Index < index) {
	utf16Index += characterSize(utf8Index);
	utf8Index++;
	}
	m_utf16Index = utf16Index;
	}
	}

	void CharacterIterator::setUTF16Index(int index)
	{
	if (index == m_utf16Index)
	return;
	if (index <= 0)
	m_utf16Index = m_index = 0;
	else if (index >= m_utf16Length) {
	m_utf16Index = m_utf16Length;
	m_index = m_length;
	} else if (m_length == m_utf16Length)
	m_utf16Index = m_index = index;
	else {
	m_utf16Index = index;
	int utf16Index = 0;
	int utf8Index = 0;
	while (utf16Index < index) {
	utf16Index += characterSize(utf8Index);
	utf8Index++;
	}
	m_index = utf8Index;
	}
	}

	int CharacterIterator::first()
	{
	m_index = m_utf16Index = 0;
	return m_index;
	}

	int CharacterIterator::last()
	{
	m_index = m_length;
	m_utf16Index = m_utf16Length;
	return m_index;
	}

	int CharacterIterator::next()
	{
	int next = m_index + 1;

	if (next <= m_length) {
	m_utf16Index = min(m_utf16Index + characterSize(m_index), m_utf16Length);
	m_index = next;
	} else {
	m_index = TextBreakDone;
	m_utf16Index = TextBreakDone;
	}

	return m_index;
	}

	int CharacterIterator::previous()
	{
	int previous = m_index - 1;

	if (previous >= 0) {
	m_utf16Index = max(m_utf16Index - characterSize(previous), 0);
	m_index = previous;
	} else {
	m_index = TextBreakDone;
	m_utf16Index = TextBreakDone;
	}

	return m_index;
	}

	enum UBreakIteratorType {
	UBRK_CHARACTER,
	UBRK_WORD,
	UBRK_LINE,
	UBRK_SENTENCE
	};

	class TextBreakIterator {
	public:
	UBreakIteratorType m_type;
	PangoLogAttr* m_logAttrs;
	CharacterIterator m_charIterator;
	};

	static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
	UBreakIteratorType type, const UChar* string, int length)
	{
	if (!string)
	return 0;

	if (!createdIterator) {
	iterator = new TextBreakIterator();
	createdIterator = true;
	}
	if (!iterator)
	return 0;

	if (!iterator->m_charIterator.setText(string, length))
	return 0;

	int charLength = iterator->m_charIterator.getLength();

	iterator->m_type = type;
	if (createdIterator)
	g_free(iterator->m_logAttrs);
	iterator->m_logAttrs = g_new0(PangoLogAttr, charLength + 1);
	pango_get_log_attrs(iterator->m_charIterator.getText(), iterator->m_charIterator.getSize(),
	-1, 0, iterator->m_logAttrs, charLength + 1);

	return iterator;
	}

	static TextBreakIterator* nonSharedCharacterBreakIterator;

	NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, int length)
	{
	m_iterator = nonSharedCharacterBreakIterator;
	bool createdIterator = m_iterator && weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), m_iterator, 0);
	m_iterator = setUpIterator(createdIterator, m_iterator, UBRK_CHARACTER, buffer, length);
	}

	NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
	{
	if (!weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), 0, m_iterator))
	delete m_iterator;
	}

	TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
	{
	// FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
	static bool createdCursorMovementIterator = false;
	static TextBreakIterator* staticCursorMovementIterator;
	return setUpIterator(createdCursorMovementIterator, staticCursorMovementIterator, UBRK_CHARACTER, string, length);
	}

	TextBreakIterator* wordBreakIterator(const UChar* string, int length)
	{
	static bool createdWordBreakIterator = false;
	static TextBreakIterator* staticWordBreakIterator;
	return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
	}

	static bool createdLineBreakIterator = false;
	static TextBreakIterator* staticLineBreakIterator;

	TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString&)
	{
	TextBreakIterator* lineBreakIterator = 0;
	if (!createdLineBreakIterator \|\| staticLineBreakIterator) {
	setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
	swap(staticLineBreakIterator, lineBreakIterator);
	}

	if (!lineBreakIterator) {
	bool createdNewLineBreakIterator = false;
	setUpIterator(createdNewLineBreakIterator, lineBreakIterator, UBRK_LINE, string, length);
	}

	return lineBreakIterator;
	}

	void releaseLineBreakIterator(TextBreakIterator* iterator)
	{
	ASSERT(createdLineBreakIterator);
	ASSERT(iterator);

	if (!staticLineBreakIterator)
	staticLineBreakIterator = iterator;
	else
	delete iterator;
	}

	TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
	{
	static bool createdSentenceBreakIterator = false;
	static TextBreakIterator* staticSentenceBreakIterator;
	return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
	}

	int textBreakFirst(TextBreakIterator* iterator)
	{
	iterator->m_charIterator.first();
	return iterator->m_charIterator.getUTF16Index();
	}

	int textBreakLast(TextBreakIterator* iterator)
	{
	// TextBreakLast is not meant to find just any break according to bi->m_type
	// but really the one near the last character.
	// (cmp ICU documentation for ubrk_first and ubrk_last)
	// From ICU docs for ubrk_last:
	// "Determine the index immediately beyond the last character in the text being scanned."

	// So we should advance or traverse back based on bi->m_logAttrs cursor positions.
	// If last character position in the original string is a whitespace,
	// traverse to the left until the first non-white character position is found
	// and return the position of the first white-space char after this one.
	// Otherwise return m_length, as "the first character beyond the last" is outside our string.

	bool whiteSpaceAtTheEnd = true;
	int nextWhiteSpacePos = iterator->m_charIterator.getLength();

	int pos = iterator->m_charIterator.last();
	while (pos >= 0 && whiteSpaceAtTheEnd) {
	if (iterator->m_logAttrs[pos].is_cursor_position) {
	if (whiteSpaceAtTheEnd = iterator->m_logAttrs[pos].is_white)
	nextWhiteSpacePos = pos;
	}
	pos = iterator->m_charIterator.previous();
	}
	iterator->m_charIterator.setIndex(nextWhiteSpacePos);
	return iterator->m_charIterator.getUTF16Index();
	}

	int textBreakNext(TextBreakIterator* iterator)
	{
	while (iterator->m_charIterator.next() != TextBreakDone) {
	int index = iterator->m_charIterator.getIndex();

	// FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €,
	// are not marked as word_start & word_end as opposed to the way ICU does it.
	// This leads to - for example - different word selection behaviour when right clicking.

	if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
	\|\| (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start \|\| iterator->m_logAttrs[index].is_word_end))
	\|\| (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
	\|\| (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
	break;
	}
	}
	return iterator->m_charIterator.getUTF16Index();
	}

	int textBreakPrevious(TextBreakIterator* iterator)
	{
	while (iterator->m_charIterator.previous() != TextBreakDone) {
	int index = iterator->m_charIterator.getIndex();

	if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
	\|\| (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start \|\| iterator->m_logAttrs[index].is_word_end))
	\|\| (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
	\|\| (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
	break;
	}
	}
	return iterator->m_charIterator.getUTF16Index();
	}

	int textBreakPreceding(TextBreakIterator* iterator, int offset)
	{
	if (offset > iterator->m_charIterator.getUTF16Length())
	return TextBreakDone;
	if (offset < 0)
	return 0;
	iterator->m_charIterator.setUTF16Index(offset);
	return textBreakPrevious(iterator);
	}

	int textBreakFollowing(TextBreakIterator* iterator, int offset)
	{
	if (offset > iterator->m_charIterator.getUTF16Length())
	return TextBreakDone;
	if (offset < 0)
	return 0;
	iterator->m_charIterator.setUTF16Index(offset);
	return textBreakNext(iterator);
	}

	int textBreakCurrent(TextBreakIterator* iterator)
	{
	return iterator->m_charIterator.getUTF16Index();
	}

	bool isTextBreak(TextBreakIterator* iterator, int offset)
	{
	if (!offset)
	return true;
	if (offset > iterator->m_charIterator.getUTF16Length())
	return false;

	iterator->m_charIterator.setUTF16Index(offset);

	int index = iterator->m_charIterator.getIndex();
	iterator->m_charIterator.previous();
	textBreakNext(iterator);
	return iterator->m_charIterator.getIndex() == index;
	}

	bool isWordTextBreak(TextBreakIterator*)
	{
	return true;
	}

	}