ojluni/src/main/java/sun/text/normalizer/NormalizerBase.java - platform/libcore - Git at Google

 /*
  * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
 /*
  *******************************************************************************
  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
  *                                                                             *
  * The original version of this source code and documentation is copyrighted   *
  * and owned by IBM, These materials are provided under terms of a License     *
  * Agreement between IBM and Sun. This technology is protected by multiple     *
  * US and International patents. This notice and attribution to IBM may not    *
  * to removed.                                                                 *
  *******************************************************************************
  */

 package sun.text.normalizer;

 import java.text.CharacterIterator;
 import java.text.Normalizer;

 /**
  * Unicode Normalization
  *
  * <h2>Unicode normalization API</h2>
  *
  * <code>normalize</code> transforms Unicode text into an equivalent composed or
  * decomposed form, allowing for easier sorting and searching of text.
  * <code>normalize</code> supports the standard normalization forms described in
  * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
  * Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
  *
  * Characters with accents or other adornments can be encoded in
  * several different ways in Unicode.  For example, take the character A-acute.
  * In Unicode, this can be encoded as a single character (the
  * "composed" form):
  *
  * <p>
  *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
  * </p>
  *
  * or as two separate characters (the "decomposed" form):
  *
  * <p>
  *      0041    LATIN CAPITAL LETTER A
  *      0301    COMBINING ACUTE ACCENT
  * </p>
  *
  * To a user of your program, however, both of these sequences should be
  * treated as the same "user-level" character "A with acute accent".  When you
  * are searching or comparing text, you must ensure that these two sequences are
  * treated equivalently.  In addition, you must handle characters with more than
  * one accent.  Sometimes the order of a character's combining accents is
  * significant, while in other cases accent sequences in different orders are
  * really equivalent.
  *
  * Similarly, the string "ffi" can be encoded as three separate letters:
  *
  * <p>
  *      0066    LATIN SMALL LETTER F
  *      0066    LATIN SMALL LETTER F
  *      0069    LATIN SMALL LETTER I
  * </p>
  *
  * or as the single character
  *
  * <p>
  *      FB03    LATIN SMALL LIGATURE FFI
  * </p>
  *
  * The ffi ligature is not a distinct semantic character, and strictly speaking
  * it shouldn't be in Unicode at all, but it was included for compatibility
  * with existing character sets that already provided it.  The Unicode standard
  * identifies such characters by giving them "compatibility" decompositions
  * into the corresponding semantic characters.  When sorting and searching, you
  * will often want to use these mappings.
  *
  * <code>normalize</code> helps solve these problems by transforming text into
  * the canonical composed and decomposed forms as shown in the first example
  * above. In addition, you can have it perform compatibility decompositions so
  * that you can treat compatibility characters the same as their equivalents.
  * Finally, <code>normalize</code> rearranges accents into the proper canonical
  * order, so that you do not have to worry about accent rearrangement on your
  * own.
  *
  * Form FCD, "Fast C or D", is also designed for collation.
  * It allows to work on strings that are not necessarily normalized
  * with an algorithm (like in collation) that works under "canonical closure",
  * i.e., it treats precomposed characters and their decomposed equivalents the
  * same.
  *
  * It is not a normalization form because it does not provide for uniqueness of
  * representation. Multiple strings may be canonically equivalent (their NFDs
  * are identical) and may all conform to FCD without being identical themselves.
  *
  * The form is defined such that the "raw decomposition", the recursive
  * canonical decomposition of each character, results in a string that is
  * canonically ordered. This means that precomposed characters are allowed for
  * as long as their decompositions do not need canonical reordering.
  *
  * Its advantage for a process like collation is that all NFD and most NFC texts
  * - and many unnormalized texts - already conform to FCD and do not need to be
  * normalized (NFD) for such a process. The FCD quick check will return YES for
  * most strings in practice.
  *
  * normalize(FCD) may be implemented with NFD.
  *
  * For more details on FCD see the collation design document:
  * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
  *
  * ICU collation performs either NFD or FCD normalization automatically if
  * normalization is turned on for the collator object. Beyond collation and
  * string search, normalized strings may be useful for string equivalence
  * comparisons, transliteration/transcription, unique representations, etc.
  *
  * The W3C generally recommends to exchange texts in NFC.
  * Note also that most legacy character encodings use only precomposed forms and
  * often do not encode any combining marks by themselves. For conversion to such
  * character encodings the Unicode text needs to be normalized to NFC.
  * For more usage examples, see the Unicode Standard Annex.
  * @stable ICU 2.8
  */

 public final class NormalizerBase implements Cloneable {

     //-------------------------------------------------------------------------
     // Private data
     //-------------------------------------------------------------------------
     private char[] buffer = new char[100];
     private int bufferStart = 0;
     private int bufferPos   = 0;
     private int bufferLimit = 0;

     // The input text and our position in it
     private UCharacterIterator  text;
     private Mode                mode = NFC;
     private int                 options = 0;
     private int                 currentIndex;
     private int                 nextIndex;

     /**
      * Options bit set value to select Unicode 3.2 normalization
      * (except NormalizationCorrections).
      * At most one Unicode version can be selected at a time.
      * @stable ICU 2.6
      */
     public static final int UNICODE_3_2=0x20;

     /**
      * Constant indicating that the end of the iteration has been reached.
      * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
      * @stable ICU 2.8
      */
     public static final int DONE = UCharacterIterator.DONE;

     /**
      * Constants for normalization modes.
      * @stable ICU 2.8
      */
     public static class Mode {
         private int modeValue;
         private Mode(int value) {
             modeValue = value;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.6
          */
         protected int normalize(char[] src, int srcStart, int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                 UnicodeSet nx) {
             int srcLen = (srcLimit - srcStart);
             int destLen = (destLimit - destStart);
             if( srcLen > destLen ) {
                 return srcLen;
             }
             System.arraycopy(src,srcStart,dest,destStart,srcLen);
             return srcLen;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.6
          */
         protected int normalize(char[] src, int srcStart, int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                 int options) {
             return normalize(   src, srcStart, srcLimit,
                                 dest,destStart,destLimit,
                                 NormalizerImpl.getNX(options)
                                 );
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.6
          */
         protected String normalize(String src, int options) {
             return src;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected int getMinC() {
             return -1;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected int getMask() {
             return -1;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected IsPrevBoundary getPrevBoundary() {
             return null;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected IsNextBoundary getNextBoundary() {
             return null;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.6
          */
         protected QuickCheckResult quickCheck(char[] src,int start, int limit,
                                               boolean allowMaybe,UnicodeSet nx) {
             if(allowMaybe) {
                 return MAYBE;
             }
             return NO;
         }

         /**
          * This method is used for method dispatch
          * @stable ICU 2.8
          */
         protected boolean isNFSkippable(int c) {
             return true;
         }
     }

     /**
      * No decomposition/composition.
      * @stable ICU 2.8
      */
     public static final Mode NONE = new Mode(1);

     /**
      * Canonical decomposition.
      * @stable ICU 2.8
      */
     public static final Mode NFD = new NFDMode(2);

     private static final class NFDMode extends Mode {
         private NFDMode(int value) {
             super(value);
         }

         protected int normalize(char[] src, int srcStart, int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                 UnicodeSet nx) {
             int[] trailCC = new int[1];
             return NormalizerImpl.decompose(src,  srcStart,srcLimit,
                                             dest, destStart,destLimit,
                                             false, trailCC,nx);
         }

         protected String normalize( String src, int options) {
             return decompose(src,false,options);
         }

         protected int getMinC() {
             return NormalizerImpl.MIN_WITH_LEAD_CC;
         }

         protected IsPrevBoundary getPrevBoundary() {
             return new IsPrevNFDSafe();
         }

         protected IsNextBoundary getNextBoundary() {
             return new IsNextNFDSafe();
         }

         protected int getMask() {
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
         }

         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx) {
             return NormalizerImpl.quickCheck(
                                              src, start,limit,
                                              NormalizerImpl.getFromIndexesArr(
                                                                               NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
                                                                               ),
                                              NormalizerImpl.QC_NFD,
                                              0,
                                              allowMaybe,
                                              nx
                                              );
         }

         protected boolean isNFSkippable(int c) {
             return NormalizerImpl.isNFSkippable(c,this,
                                                 (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
                                                 );
         }
     }

     /**
      * Compatibility decomposition.
      * @stable ICU 2.8
      */
     public static final Mode NFKD = new NFKDMode(3);

     private static final class NFKDMode extends Mode {
         private NFKDMode(int value) {
             super(value);
         }

         protected int normalize(char[] src, int srcStart, int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                 UnicodeSet nx) {
             int[] trailCC = new int[1];
             return NormalizerImpl.decompose(src,  srcStart,srcLimit,
                                             dest, destStart,destLimit,
                                             true, trailCC, nx);
         }

         protected String normalize( String src, int options) {
             return decompose(src,true,options);
         }

         protected int getMinC() {
             return NormalizerImpl.MIN_WITH_LEAD_CC;
         }

         protected IsPrevBoundary getPrevBoundary() {
             return new IsPrevNFDSafe();
         }

         protected IsNextBoundary getNextBoundary() {
             return new IsNextNFDSafe();
         }

         protected int getMask() {
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
         }

         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx) {
             return NormalizerImpl.quickCheck(
                                              src,start,limit,
                                              NormalizerImpl.getFromIndexesArr(
                                                                               NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
                                                                               ),
                                              NormalizerImpl.QC_NFKD,
                                              NormalizerImpl.OPTIONS_COMPAT,
                                              allowMaybe,
                                              nx
                                              );
         }

         protected boolean isNFSkippable(int c) {
             return NormalizerImpl.isNFSkippable(c, this,
                                                 (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
                                                 );
         }
     }

     /**
      * Canonical decomposition followed by canonical composition.
      * @stable ICU 2.8
      */
     public static final Mode NFC = new NFCMode(4);

     private static final class NFCMode extends Mode{
         private NFCMode(int value) {
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                 UnicodeSet nx) {
             return NormalizerImpl.compose( src, srcStart, srcLimit,
                                            dest,destStart,destLimit,
                                            0, nx);
         }

         protected String normalize( String src, int options) {
             return compose(src, false, options);
         }

         protected int getMinC() {
             return NormalizerImpl.getFromIndexesArr(
                                                     NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
                                                     );
         }
         protected IsPrevBoundary getPrevBoundary() {
             return new IsPrevTrueStarter();
         }
         protected IsNextBoundary getNextBoundary() {
             return new IsNextTrueStarter();
         }
         protected int getMask() {
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx) {
             return NormalizerImpl.quickCheck(
                                              src,start,limit,
                                              NormalizerImpl.getFromIndexesArr(
                                                                               NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
                                                                               ),
                                              NormalizerImpl.QC_NFC,
                                              0,
                                              allowMaybe,
                                              nx
                                              );
         }
         protected boolean isNFSkippable(int c) {
             return NormalizerImpl.isNFSkippable(c,this,
                                                 ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
                                                   (NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
                                                   )
                                                 );
         }
     };

     /**
      * Compatibility decomposition followed by canonical composition.
      * @stable ICU 2.8
      */
     public static final Mode NFKC =new NFKCMode(5);

     private static final class NFKCMode extends Mode{
         private NFKCMode(int value) {
             super(value);
         }
         protected int normalize(char[] src, int srcStart, int srcLimit,
                                 char[] dest,int destStart,int destLimit,
                                 UnicodeSet nx) {
             return NormalizerImpl.compose(src,  srcStart,srcLimit,
                                           dest, destStart,destLimit,
                                           NormalizerImpl.OPTIONS_COMPAT, nx);
         }

         protected String normalize( String src, int options) {
             return compose(src, true, options);
         }
         protected int getMinC() {
             return NormalizerImpl.getFromIndexesArr(
                                                     NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
                                                     );
         }
         protected IsPrevBoundary getPrevBoundary() {
             return new IsPrevTrueStarter();
         }
         protected IsNextBoundary getNextBoundary() {
             return new IsNextTrueStarter();
         }
         protected int getMask() {
             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
         }
         protected QuickCheckResult quickCheck(char[] src,int start,
                                               int limit,boolean allowMaybe,
                                               UnicodeSet nx) {
             return NormalizerImpl.quickCheck(
                                              src,start,limit,
                                              NormalizerImpl.getFromIndexesArr(
                                                                               NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
                                                                               ),
                                              NormalizerImpl.QC_NFKC,
                                              NormalizerImpl.OPTIONS_COMPAT,
                                              allowMaybe,
                                              nx
                                              );
         }
         protected boolean isNFSkippable(int c) {
             return NormalizerImpl.isNFSkippable(c, this,
                                                 ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
                                                   (NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
                                                   )
                                                 );
         }
     };

     /**
      * Result values for quickCheck().
      * For details see Unicode Technical Report 15.
      * @stable ICU 2.8
      */
     public static final class QuickCheckResult{
         private int resultValue;
         private QuickCheckResult(int value) {
             resultValue=value;
         }
     }
     /**
      * Indicates that string is not in the normalized format
      * @stable ICU 2.8
      */
     public static final QuickCheckResult NO = new QuickCheckResult(0);

     /**
      * Indicates that string is in the normalized format
      * @stable ICU 2.8
      */
     public static final QuickCheckResult YES = new QuickCheckResult(1);

     /**
      * Indicates it cannot be determined if string is in the normalized
      * format without further thorough checks.
      * @stable ICU 2.8
      */
     public static final QuickCheckResult MAYBE = new QuickCheckResult(2);

     //-------------------------------------------------------------------------
     // Constructors
     //-------------------------------------------------------------------------

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of a given string.
      * <p>
      * The <tt>options</tt> parameter specifies which optional
      * <tt>Normalizer</tt> features are to be enabled for this object.
      * <p>
      * @param str  The string to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode The normalization mode.
      *
      * @param opt Any optional features to be enabled.
      *            Currently the only available option is {@link #UNICODE_3_2}.
      *            If you want the default behavior corresponding to one of the
      *            standard Unicode Normalization Forms, use 0 for this argument.
      * @stable ICU 2.6
      */
     public NormalizerBase(String str, Mode mode, int opt) {
         this.text = UCharacterIterator.getInstance(str);
         this.mode = mode;
         this.options=opt;
     }

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of the given text.
      * <p>
      * @param iter  The input text to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode  The normalization mode.
      */
     public NormalizerBase(CharacterIterator iter, Mode mode) {
           this(iter, mode, UNICODE_LATEST);
     }

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of the given text.
      * <p>
      * @param iter  The input text to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode  The normalization mode.
      *
      * @param opt Any optional features to be enabled.
      *            Currently the only available option is {@link #UNICODE_3_2}.
      *            If you want the default behavior corresponding to one of the
      *            standard Unicode Normalization Forms, use 0 for this argument.
      * @stable ICU 2.6
      */
     public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
         this.text = UCharacterIterator.getInstance(
                                                    (CharacterIterator)iter.clone()
                                                    );
         this.mode = mode;
         this.options = opt;
     }

     /**
      * Clones this <tt>Normalizer</tt> object.  All properties of this
      * object are duplicated in the new object, including the cloning of any
      * {@link CharacterIterator} that was passed in to the constructor
      * or to {@link #setText(CharacterIterator) setText}.
      * However, the text storage underlying
      * the <tt>CharacterIterator</tt> is not duplicated unless the
      * iterator's <tt>clone</tt> method does so.
      * @stable ICU 2.8
      */
     public Object clone() {
         try {
             NormalizerBase copy = (NormalizerBase) super.clone();
             copy.text = (UCharacterIterator) text.clone();
             //clone the internal buffer
             if (buffer != null) {
                 copy.buffer = new char[buffer.length];
                 System.arraycopy(buffer,0,copy.buffer,0,buffer.length);
             }
             return copy;
         }
         catch (CloneNotSupportedException e) {
             throw new InternalError(e.toString());
         }
     }

     //--------------------------------------------------------------------------
     // Static Utility methods
     //--------------------------------------------------------------------------

     /**
      * Compose a string.
      * The string will be composed to according the the specified mode.
      * @param str        The string to compose.
      * @param compat     If true the string will be composed accoding to
      *                    NFKC rules and if false will be composed according to
      *                    NFC rules.
      * @param options    The only recognized option is UNICODE_3_2
      * @return String    The composed string
      * @stable ICU 2.6
      */
     public static String compose(String str, boolean compat, int options) {

         char[] dest, src;
         if (options == UNICODE_3_2_0_ORIGINAL) {
             String mappedStr = NormalizerImpl.convert(str);
             dest = new char[mappedStr.length()*MAX_BUF_SIZE_COMPOSE];
             src = mappedStr.toCharArray();
         } else {
             dest = new char[str.length()*MAX_BUF_SIZE_COMPOSE];
             src = str.toCharArray();
         }
         int destSize=0;

         UnicodeSet nx = NormalizerImpl.getNX(options);

         /* reset options bits that should only be set here or inside compose() */
         options&=~(NormalizerImpl.OPTIONS_SETS_MASK|NormalizerImpl.OPTIONS_COMPAT|NormalizerImpl.OPTIONS_COMPOSE_CONTIGUOUS);

         if(compat) {
             options|=NormalizerImpl.OPTIONS_COMPAT;
         }

         for(;;) {
             destSize=NormalizerImpl.compose(src,0,src.length,
                                             dest,0,dest.length,options,
                                             nx);
             if(destSize<=dest.length) {
                 return new String(dest,0,destSize);
             } else {
                 dest = new char[destSize];
             }
         }
     }

     private static final int MAX_BUF_SIZE_COMPOSE = 2;
     private static final int MAX_BUF_SIZE_DECOMPOSE = 3;

     /**
      * Decompose a string.
      * The string will be decomposed to according the the specified mode.
      * @param str       The string to decompose.
      * @param compat    If true the string will be decomposed accoding to NFKD
      *                   rules and if false will be decomposed according to NFD
      *                   rules.
      * @return String   The decomposed string
      * @stable ICU 2.8
      */
     public static String decompose(String str, boolean compat) {
         return decompose(str,compat,UNICODE_LATEST);
     }

     /**
      * Decompose a string.
      * The string will be decomposed to according the the specified mode.
      * @param str     The string to decompose.
      * @param compat  If true the string will be decomposed accoding to NFKD
      *                 rules and if false will be decomposed according to NFD
      *                 rules.
      * @param options The normalization options, ORed together (0 for no options).
      * @return String The decomposed string
      * @stable ICU 2.6
      */
     public static String decompose(String str, boolean compat, int options) {

         int[] trailCC = new int[1];
         int destSize=0;
         UnicodeSet nx = NormalizerImpl.getNX(options);
         char[] dest;

         if (options == UNICODE_3_2_0_ORIGINAL) {
             String mappedStr = NormalizerImpl.convert(str);
             dest = new char[mappedStr.length()*MAX_BUF_SIZE_DECOMPOSE];

             for(;;) {
                 destSize=NormalizerImpl.decompose(mappedStr.toCharArray(),0,mappedStr.length(),
                                                   dest,0,dest.length,
                                                   compat,trailCC, nx);
                 if(destSize<=dest.length) {
                     return new String(dest,0,destSize);
                 } else {
                     dest = new char[destSize];
                 }
             }
         } else {
             dest = new char[str.length()*MAX_BUF_SIZE_DECOMPOSE];

             for(;;) {
                 destSize=NormalizerImpl.decompose(str.toCharArray(),0,str.length(),
                                                   dest,0,dest.length,
                                                   compat,trailCC, nx);
                 if(destSize<=dest.length) {
                     return new String(dest,0,destSize);
                 } else {
                     dest = new char[destSize];
                 }
             }
         }
     }

     /**
      * Normalize a string.
      * The string will be normalized according the the specified normalization
      * mode and options.
      * @param src       The char array to compose.
      * @param srcStart  Start index of the source
      * @param srcLimit  Limit index of the source
      * @param dest      The char buffer to fill in
      * @param destStart Start index of the destination buffer
      * @param destLimit End index of the destination buffer
      * @param mode      The normalization mode; one of Normalizer.NONE,
      *                   Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
      *                   Normalizer.NFKD, Normalizer.DEFAULT
      * @param options The normalization options, ORed together (0 for no options).
      * @return int      The total buffer size needed;if greater than length of
      *                   result, the output was truncated.
      * @exception       IndexOutOfBoundsException if the target capacity is
      *                   less than the required length
      * @stable ICU 2.6
      */
     public static int normalize(char[] src,int srcStart, int srcLimit,
                                 char[] dest,int destStart, int destLimit,
                                 Mode  mode, int options) {
         int length = mode.normalize(src,srcStart,srcLimit,dest,destStart,destLimit, options);

         if(length<=(destLimit-destStart)) {
             return length;
         } else {
             throw new IndexOutOfBoundsException(Integer.toString(length));
         }
     }

     //-------------------------------------------------------------------------
     // Iteration API
     //-------------------------------------------------------------------------

     /**
      * Return the current character in the normalized text->
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int current() {
         if(bufferPos<bufferLimit || nextNormalize()) {
             return getCodePointAt(bufferPos);
         } else {
             return DONE;
         }
     }

     /**
      * Return the next character in the normalized text and advance
      * the iteration position by one.  If the end
      * of the text has already been reached, {@link #DONE} is returned.
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int next() {
         if(bufferPos<bufferLimit ||  nextNormalize()) {
             int c=getCodePointAt(bufferPos);
             bufferPos+=(c>0xFFFF) ? 2 : 1;
             return c;
         } else {
             return DONE;
         }
     }


     /**
      * Return the previous character in the normalized text and decrement
      * the iteration position by one.  If the beginning
      * of the text has already been reached, {@link #DONE} is returned.
      * @return The codepoint as an int
      * @stable ICU 2.8
      */
     public int previous() {
         if(bufferPos>0 || previousNormalize()) {
             int c=getCodePointAt(bufferPos-1);
             bufferPos-=(c>0xFFFF) ? 2 : 1;
             return c;
         } else {
             return DONE;
         }
     }

     /**
      * Reset the index to the beginning of the text.
      * This is equivalent to setIndexOnly(startIndex)).
      * @stable ICU 2.8
      */
     public void reset() {
         text.setIndex(0);
         currentIndex=nextIndex=0;
         clearBuffer();
     }

     /**
      * Set the iteration position in the input text that is being normalized,
      * without any immediate normalization.
      * After setIndexOnly(), getIndex() will return the same index that is
      * specified here.
      *
      * @param index the desired index in the input text.
      * @stable ICU 2.8
      */
     public void setIndexOnly(int index) {
         text.setIndex(index);
         currentIndex=nextIndex=index; // validates index
         clearBuffer();
     }

     /**
      * Set the iteration position in the input text that is being normalized
      * and return the first normalized character at that position.
      * <p>
      * <b>Note:</b> This method sets the position in the <em>input</em> text,
      * while {@link #next} and {@link #previous} iterate through characters
      * in the normalized <em>output</em>.  This means that there is not
      * necessarily a one-to-one correspondence between characters returned
      * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
      * returned from <tt>setIndex</tt> and {@link #getIndex}.
      * <p>
      * @param index the desired index in the input text->
      *
      * @return   the first normalized character that is the result of iterating
      *            forward starting at the given index.
      *
      * @throws IllegalArgumentException if the given index is less than
      *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
      * @return The codepoint as an int
      * @deprecated ICU 3.2
      * @obsolete ICU 3.2
      */
      public int setIndex(int index) {
          setIndexOnly(index);
          return current();
      }

     /**
      * Retrieve the index of the start of the input text. This is the begin
      * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
      * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
      * @deprecated ICU 2.2. Use startIndex() instead.
      * @return The codepoint as an int
      * @see #startIndex
      */
     public int getBeginIndex() {
         return 0;
     }

     /**
      * Retrieve the index of the end of the input text.  This is the end index
      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
      * over which this <tt>Normalizer</tt> is iterating
      * @deprecated ICU 2.2. Use endIndex() instead.
      * @return The codepoint as an int
      * @see #endIndex
      */
     public int getEndIndex() {
         return endIndex();
     }

     /**
      * Retrieve the current iteration position in the input text that is
      * being normalized.  This method is useful in applications such as
      * searching, where you need to be able to determine the position in
      * the input text that corresponds to a given normalized output character.
      * <p>
      * <b>Note:</b> This method sets the position in the <em>input</em>, while
      * {@link #next} and {@link #previous} iterate through characters in the
      * <em>output</em>.  This means that there is not necessarily a one-to-one
      * correspondence between characters returned by <tt>next</tt> and
      * <tt>previous</tt> and the indices passed to and returned from
      * <tt>setIndex</tt> and {@link #getIndex}.
      * @return The current iteration position
      * @stable ICU 2.8
      */
     public int getIndex() {
         if(bufferPos<bufferLimit) {
             return currentIndex;
         } else {
             return nextIndex;
         }
     }

     /**
      * Retrieve the index of the end of the input text->  This is the end index
      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
      * over which this <tt>Normalizer</tt> is iterating
      * @return The current iteration position
      * @stable ICU 2.8
      */
     public int endIndex() {
         return text.getLength();
     }

     //-------------------------------------------------------------------------
     // Property access methods
     //-------------------------------------------------------------------------
     /**
      * Set the normalization mode for this object.
      * <p>
      * <b>Note:</b>If the normalization mode is changed while iterating
      * over a string, calls to {@link #next} and {@link #previous} may
      * return previously buffers characters in the old normalization mode
      * until the iteration is able to re-sync at the next base character.
      * It is safest to call {@link #setText setText()}, {@link #first},
      * {@link #last}, etc. after calling <tt>setMode</tt>.
      * <p>
      * @param newMode the new mode for this <tt>Normalizer</tt>.
      * The supported modes are:
      * <ul>
      *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
      *                                  followed by canonical composition.
      *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
      *                                  follwed by canonical composition.
      *  <li>{@link #DECOMP}         - Unicode canonical decomposition
      *  <li>{@link #DECOMP_COMPAT}  - Unicode compatibility decomposition.
      *  <li>{@link #NO_OP}          - Do nothing but return characters
      *                                  from the underlying input text.
      * </ul>
      *
      * @see #getMode
      * @stable ICU 2.8
      */
     public void setMode(Mode newMode) {
         mode = newMode;
     }
     /**
      * Return the basic operation performed by this <tt>Normalizer</tt>
      *
      * @see #setMode
      * @stable ICU 2.8
      */
     public Mode getMode() {
         return mode;
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the input text->
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(String newText) {

         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
         if (newIter == null) {
             throw new InternalError("Could not create a new UCharacterIterator");
         }
         text = newIter;
         reset();
     }

     /**
      * Set the input text over which this <tt>Normalizer</tt> will iterate.
      * The iteration position is set to the beginning of the input text->
      * @param newText   The new string to be normalized.
      * @stable ICU 2.8
      */
     public void setText(CharacterIterator newText) {

         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
         if (newIter == null) {
             throw new InternalError("Could not create a new UCharacterIterator");
         }
         text = newIter;
         currentIndex=nextIndex=0;
         clearBuffer();
     }

     //-------------------------------------------------------------------------
     // Private utility methods
     //-------------------------------------------------------------------------


     /* backward iteration --------------------------------------------------- */

     /*
      * read backwards and get norm32
      * return 0 if the character is <minC
      * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
      * surrogate but read second!)
      */

     private static  long getPrevNorm32(UCharacterIterator src,
                                        int/*unsigned*/ minC,
                                        int/*unsigned*/ mask,
                                        char[] chars) {
         long norm32;
         int ch=0;
         /* need src.hasPrevious() */
         if((ch=src.previous()) == UCharacterIterator.DONE) {
             return 0;
         }
         chars[0]=(char)ch;
         chars[1]=0;

         /* check for a surrogate before getting norm32 to see if we need to
          * predecrement further */
         if(chars[0]<minC) {
             return 0;
         } else if(!UTF16.isSurrogate(chars[0])) {
             return NormalizerImpl.getNorm32(chars[0]);
         } else if(UTF16.isLeadSurrogate(chars[0]) || (src.getIndex()==0)) {
             /* unpaired surrogate */
             chars[1]=(char)src.current();
             return 0;
         } else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
             norm32=NormalizerImpl.getNorm32(chars[1]);
             if((norm32&mask)==0) {
                 /* all surrogate pairs with this lead surrogate have irrelevant
                  * data */
                 return 0;
             } else {
                 /* norm32 must be a surrogate special */
                 return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
             }
         } else {
             /* unpaired second surrogate, undo the c2=src.previous() movement */
             src.moveIndex( 1);
             return 0;
         }
     }

     private interface IsPrevBoundary{
         public boolean isPrevBoundary(UCharacterIterator src,
                                       int/*unsigned*/ minC,
                                       int/*unsigned*/ mask,
                                       char[] chars);
     }
     private static final class IsPrevNFDSafe implements IsPrevBoundary{
         /*
          * for NF*D:
          * read backwards and check if the lead combining class is 0
          * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
          * surrogate but read second!)
          */
         public boolean isPrevBoundary(UCharacterIterator src,
                                       int/*unsigned*/ minC,
                                       int/*unsigned*/ ccOrQCMask,
                                       char[] chars) {

             return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
                                                           ccOrQCMask, chars),
                                             ccOrQCMask,
                                             ccOrQCMask& NormalizerImpl.QC_MASK);
         }
     }

     private static final class IsPrevTrueStarter implements IsPrevBoundary{
         /*
          * read backwards and check if the character is (or its decomposition
          * begins with) a "true starter" (cc==0 and NF*C_YES)
          * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
          * surrogate but read second!)
          */
         public boolean isPrevBoundary(UCharacterIterator src,
                                       int/*unsigned*/ minC,
                                       int/*unsigned*/ ccOrQCMask,
                                       char[] chars) {
             long norm32;
             int/*unsigned*/ decompQCMask;

             decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
             norm32=getPrevNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
             return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
         }
     }

     private static int findPreviousIterationBoundary(UCharacterIterator src,
                                                      IsPrevBoundary obj,
                                                      int/*unsigned*/ minC,
                                                      int/*mask*/ mask,
                                                      char[] buffer,
                                                      int[] startIndex) {
         char[] chars=new char[2];
         boolean isBoundary;

         /* fill the buffer from the end backwards */
         startIndex[0] = buffer.length;
         chars[0]=0;
         while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
             isBoundary=obj.isPrevBoundary(src, minC, mask, chars);

             /* always write this character to the front of the buffer */
             /* make sure there is enough space in the buffer */
             if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {

                 // grow the buffer
                 char[] newBuf = new char[buffer.length*2];
                 /* move the current buffer contents up */
                 System.arraycopy(buffer,startIndex[0],newBuf,
                                  newBuf.length-(buffer.length-startIndex[0]),
                                  buffer.length-startIndex[0]);
                 //adjust the startIndex
                 startIndex[0]+=newBuf.length-buffer.length;

                 buffer=newBuf;
                 newBuf=null;

             }

             buffer[--startIndex[0]]=chars[0];
             if(chars[1]!=0) {
                 buffer[--startIndex[0]]=chars[1];
             }

             /* stop if this just-copied character is a boundary */
             if(isBoundary) {
                 break;
             }
         }

         /* return the length of the buffer contents */
         return buffer.length-startIndex[0];
     }

     private static int previous(UCharacterIterator src,
                                 char[] dest, int destStart, int destLimit,
                                 Mode mode,
                                 boolean doNormalize,
                                 boolean[] pNeededToNormalize,
                                 int options) {

         IsPrevBoundary isPreviousBoundary;
         int destLength, bufferLength;
         int/*unsigned*/ mask;
         int c,c2;

         char minC;
         int destCapacity = destLimit-destStart;
         destLength=0;

         if(pNeededToNormalize!=null) {
             pNeededToNormalize[0]=false;
         }
         minC = (char)mode.getMinC();
         mask = mode.getMask();
         isPreviousBoundary = mode.getPrevBoundary();

         if(isPreviousBoundary==null) {
             destLength=0;
             if((c=src.previous())>=0) {
                 destLength=1;
                 if(UTF16.isTrailSurrogate((char)c)) {
                     c2= src.previous();
                     if(c2!= UCharacterIterator.DONE) {
                         if(UTF16.isLeadSurrogate((char)c2)) {
                             if(destCapacity>=2) {
                                 dest[1]=(char)c; // trail surrogate
                                 destLength=2;
                             }
                             // lead surrogate to be written below
                             c=c2;
                         } else {
                             src.moveIndex(1);
                         }
                     }
                 }

                 if(destCapacity>0) {
                     dest[0]=(char)c;
                 }
             }
             return destLength;
         }

         char[] buffer = new char[100];
         int[] startIndex= new int[1];
         bufferLength=findPreviousIterationBoundary(src,
                                                    isPreviousBoundary,
                                                    minC, mask,buffer,
                                                    startIndex);
         if(bufferLength>0) {
             if(doNormalize) {
                 destLength=NormalizerBase.normalize(buffer,startIndex[0],
                                                 startIndex[0]+bufferLength,
                                                 dest, destStart,destLimit,
                                                 mode, options);

                 if(pNeededToNormalize!=null) {
                     pNeededToNormalize[0]=(boolean)(destLength!=bufferLength ||
                                                     Utility.arrayRegionMatches(
                                                                                buffer,0,dest,
                                                                                destStart,destLimit
                                                                                ));
                 }
             } else {
                 /* just copy the source characters */
                 if(destCapacity>0) {
                     System.arraycopy(buffer,startIndex[0],dest,0,
                                      (bufferLength<destCapacity) ?
                                      bufferLength : destCapacity
                                      );
                 }
             }
         }


         return destLength;
     }


     /* forward iteration ---------------------------------------------------- */
     /*
      * read forward and check if the character is a next-iteration boundary
      * if c2!=0 then (c, c2) is a surrogate pair
      */
     private interface IsNextBoundary{
         boolean isNextBoundary(UCharacterIterator src,
                                int/*unsigned*/ minC,
                                int/*unsigned*/ mask,
                                int[] chars);
     }
     /*
      * read forward and get norm32
      * return 0 if the character is <minC
      * if c2!=0 then (c2, c) is a surrogate pair
      * always reads complete characters
      */
     private static long /*unsigned*/ getNextNorm32(UCharacterIterator src,
                                                    int/*unsigned*/ minC,
                                                    int/*unsigned*/ mask,
                                                    int[] chars) {
         long norm32;

         /* need src.hasNext() to be true */
         chars[0]=src.next();
         chars[1]=0;

         if(chars[0]<minC) {
             return 0;
         }

         norm32=NormalizerImpl.getNorm32((char)chars[0]);
         if(UTF16.isLeadSurrogate((char)chars[0])) {
             if(src.current()!=UCharacterIterator.DONE &&
                UTF16.isTrailSurrogate((char)(chars[1]=src.current()))) {
                 src.moveIndex(1); /* skip the c2 surrogate */
                 if((norm32&mask)==0) {
                     /* irrelevant data */
                     return 0;
                 } else {
                     /* norm32 must be a surrogate special */
                     return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
                 }
             } else {
                 /* unmatched surrogate */
                 return 0;
             }
         }
         return norm32;
     }


     /*
      * for NF*D:
      * read forward and check if the lead combining class is 0
      * if c2!=0 then (c, c2) is a surrogate pair
      */
     private static final class IsNextNFDSafe implements IsNextBoundary{
         public boolean isNextBoundary(UCharacterIterator src,
                                       int/*unsigned*/ minC,
                                       int/*unsigned*/ ccOrQCMask,
                                       int[] chars) {
             return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
                                             ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
         }
     }

     /*
      * for NF*C:
      * read forward and check if the character is (or its decomposition begins
      * with) a "true starter" (cc==0 and NF*C_YES)
      * if c2!=0 then (c, c2) is a surrogate pair
      */
     private static final class IsNextTrueStarter implements IsNextBoundary{
         public boolean isNextBoundary(UCharacterIterator src,
                                       int/*unsigned*/ minC,
                                       int/*unsigned*/ ccOrQCMask,
                                       int[] chars) {
             long norm32;
             int/*unsigned*/ decompQCMask;

             decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
             norm32=getNextNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
             return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
         }
     }

     private static int findNextIterationBoundary(UCharacterIterator src,
                                                  IsNextBoundary obj,
                                                  int/*unsigned*/ minC,
                                                  int/*unsigned*/ mask,
                                                  char[] buffer) {
         if(src.current()==UCharacterIterator.DONE) {
             return 0;
         }

         /* get one character and ignore its properties */
         int[] chars = new int[2];
         chars[0]=src.next();
         buffer[0]=(char)chars[0];
         int bufferIndex = 1;

         if(UTF16.isLeadSurrogate((char)chars[0])&&
            src.current()!=UCharacterIterator.DONE) {
             if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))) {
                 buffer[bufferIndex++]=(char)chars[1];
             } else {
                 src.moveIndex(-1); /* back out the non-trail-surrogate */
             }
         }

         /* get all following characters until we see a boundary */
         /* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
          * is part of the string */
         while( src.current()!=UCharacterIterator.DONE) {
             if(obj.isNextBoundary(src, minC, mask, chars)) {
                 /* back out the latest movement to stop at the boundary */
                 src.moveIndex(chars[1]==0 ? -1 : -2);
                 break;
             } else {
                 if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
                     buffer[bufferIndex++]=(char)chars[0];
                     if(chars[1]!=0) {
                         buffer[bufferIndex++]=(char)chars[1];
                     }
                 } else {
                     char[] newBuf = new char[buffer.length*2];
                     System.arraycopy(buffer,0,newBuf,0,bufferIndex);
                     buffer = newBuf;
                     buffer[bufferIndex++]=(char)chars[0];
                     if(chars[1]!=0) {
                         buffer[bufferIndex++]=(char)chars[1];
                     }
                 }
             }
         }

         /* return the length of the buffer contents */
         return bufferIndex;
     }

     private static int next(UCharacterIterator src,
                             char[] dest, int destStart, int destLimit,
                             NormalizerBase.Mode mode,
                             boolean doNormalize,
                             boolean[] pNeededToNormalize,
                             int options) {

         IsNextBoundary isNextBoundary;
         int /*unsigned*/ mask;
         int /*unsigned*/ bufferLength;
         int c,c2;
         char minC;
         int destCapacity = destLimit - destStart;
         int destLength = 0;
         if(pNeededToNormalize!=null) {
             pNeededToNormalize[0]=false;
         }

         minC = (char)mode.getMinC();
         mask = mode.getMask();
         isNextBoundary = mode.getNextBoundary();

         if(isNextBoundary==null) {
             destLength=0;
             c=src.next();
             if(c!=UCharacterIterator.DONE) {
                 destLength=1;
                 if(UTF16.isLeadSurrogate((char)c)) {
                     c2= src.next();
                     if(c2!= UCharacterIterator.DONE) {
                         if(UTF16.isTrailSurrogate((char)c2)) {
                             if(destCapacity>=2) {
                                 dest[1]=(char)c2; // trail surrogate
                                 destLength=2;
                             }
                             // lead surrogate to be written below
                         } else {
                             src.moveIndex(-1);
                         }
                     }
                 }

                 if(destCapacity>0) {
                     dest[0]=(char)c;
                 }
             }
             return destLength;
         }

         char[] buffer=new char[100];
         int[] startIndex = new int[1];
         bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
                                                buffer);
         if(bufferLength>0) {
             if(doNormalize) {
                 destLength=mode.normalize(buffer,startIndex[0],bufferLength,
                                           dest,destStart,destLimit, options);

                 if(pNeededToNormalize!=null) {
                     pNeededToNormalize[0]=(boolean)(destLength!=bufferLength ||
                                                     Utility.arrayRegionMatches(buffer,startIndex[0],
                                                                                dest,destStart,
                                                                                destLength));
                 }
             } else {
                 /* just copy the source characters */
                 if(destCapacity>0) {
                     System.arraycopy(buffer,0,dest,destStart,
                                      Math.min(bufferLength,destCapacity)
                                      );
                 }


             }
         }
         return destLength;
     }

     private void clearBuffer() {
         bufferLimit=bufferStart=bufferPos=0;
     }

     private boolean nextNormalize() {

         clearBuffer();
         currentIndex=nextIndex;
         text.setIndex(nextIndex);

         bufferLimit=next(text,buffer,bufferStart,buffer.length,mode,true,null,options);

         nextIndex=text.getIndex();
         return (bufferLimit>0);
     }

     private boolean previousNormalize() {

         clearBuffer();
         nextIndex=currentIndex;
         text.setIndex(currentIndex);
         bufferLimit=previous(text,buffer,bufferStart,buffer.length,mode,true,null,options);

         currentIndex=text.getIndex();
         bufferPos = bufferLimit;
         return bufferLimit>0;
     }

     private int getCodePointAt(int index) {
         if( UTF16.isSurrogate(buffer[index])) {
             if(UTF16.isLeadSurrogate(buffer[index])) {
                 if((index+1)<bufferLimit &&
                    UTF16.isTrailSurrogate(buffer[index+1])) {
                     return UCharacterProperty.getRawSupplementary(
                                                                   buffer[index],
                                                                   buffer[index+1]
                                                                   );
                 }
             }else if(UTF16.isTrailSurrogate(buffer[index])) {
                 if(index>0 && UTF16.isLeadSurrogate(buffer[index-1])) {
                     return UCharacterProperty.getRawSupplementary(
                                                                   buffer[index-1],
                                                                   buffer[index]
                                                                   );
                 }
             }
         }
         return buffer[index];

     }

     /**
      * Internal API
      * @internal
      */
     public static boolean isNFSkippable(int c, Mode mode) {
         return mode.isNFSkippable(c);
     }

     //
     // Options
     //

     /*
      * Default option for Unicode 3.2.0 normalization.
      * Corrigendum 4 was fixed in Unicode 3.2.0 but isn't supported in
      * IDNA/StringPrep.
      * The public review issue #29 was fixed in Unicode 4.1.0. Corrigendum 5
      * allowed Unicode 3.2 to 4.0.1 to apply the fix for PRI #29, but it isn't
      * supported by IDNA/StringPrep as well as Corrigendum 4.
      */
     public static final int UNICODE_3_2_0_ORIGINAL =
                                UNICODE_3_2 |
                                NormalizerImpl.WITHOUT_CORRIGENDUM4_CORRECTIONS |
                                NormalizerImpl.BEFORE_PRI_29;

     /*
      * Default option for the latest Unicode normalization. This option is
      * provided mainly for testing.
      * The value zero means that normalization is done with the fixes for
      *   - Corrigendum 4 (Five CJK Canonical Mapping Errors)
      *   - Corrigendum 5 (Normalization Idempotency)
      */
     public static final int UNICODE_LATEST = 0x00;

     //
     // public constructor and methods for java.text.Normalizer and
     // sun.text.Normalizer
     //

     /**
      * Creates a new <tt>Normalizer</tt> object for iterating over the
      * normalized form of a given string.
      *
      * @param str  The string to be normalized.  The normalization
      *              will start at the beginning of the string.
      *
      * @param mode The normalization mode.
      */
     public NormalizerBase(String str, Mode mode) {
           this(str, mode, UNICODE_LATEST);
     }

     /**
      * Normalizes a <code>String</code> using the given normalization form.
      *
      * @param str      the input string to be normalized.
      * @param form     the normalization form
      */
     public static String normalize(String str, Normalizer.Form form) {
         return normalize(str, form, UNICODE_LATEST);
     }

     /**
      * Normalizes a <code>String</code> using the given normalization form.
      *
      * @param str      the input string to be normalized.
      * @param form     the normalization form
      * @param options   the optional features to be enabled.
      */
     public static String normalize(String str, Normalizer.Form form, int options) {
         int len = str.length();
         boolean asciiOnly = true;
         if (len < 80) {
             for (int i = 0; i < len; i++) {
                 if (str.charAt(i) > 127) {
                     asciiOnly = false;
                     break;
                 }
             }
         } else {
             char[] a = str.toCharArray();
             for (int i = 0; i < len; i++) {
                 if (a[i] > 127) {
                     asciiOnly = false;
                     break;
                 }
             }
         }

         switch (form) {
         case NFC :
             return asciiOnly ? str : NFC.normalize(str, options);
         case NFD :
             return asciiOnly ? str : NFD.normalize(str, options);
         case NFKC :
             return asciiOnly ? str : NFKC.normalize(str, options);
         case NFKD :
             return asciiOnly ? str : NFKD.normalize(str, options);
         }

         throw new IllegalArgumentException("Unexpected normalization form: " +
                                            form);
     }

     /**
      * Test if a string is in a given normalization form.
      * This is semantically equivalent to source.equals(normalize(source, mode)).
      *
      * Unlike quickCheck(), this function returns a definitive result,
      * never a "maybe".
      * For NFD, NFKD, and FCD, both functions work exactly the same.
      * For NFC and NFKC where quickCheck may return "maybe", this function will
      * perform further tests to arrive at a true/false result.
      * @param str       the input string to be checked to see if it is normalized
      * @param form      the normalization form
      * @param options   the optional features to be enabled.
      */
     public static boolean isNormalized(String str, Normalizer.Form form) {
         return isNormalized(str, form, UNICODE_LATEST);
     }

     /**
      * Test if a string is in a given normalization form.
      * This is semantically equivalent to source.equals(normalize(source, mode)).
      *
      * Unlike quickCheck(), this function returns a definitive result,
      * never a "maybe".
      * For NFD, NFKD, and FCD, both functions work exactly the same.
      * For NFC and NFKC where quickCheck may return "maybe", this function will
      * perform further tests to arrive at a true/false result.
      * @param str       the input string to be checked to see if it is normalized
      * @param form      the normalization form
      * @param options   the optional features to be enabled.
      */
     public static boolean isNormalized(String str, Normalizer.Form form, int options) {
         switch (form) {
         case NFC:
             return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
         case NFD:
             return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
         case NFKC:
             return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
         case NFKD:
             return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
         }

         throw new IllegalArgumentException("Unexpected normalization form: " +
                                            form);
     }
 }