src/java.base/share/classes/sun/text/normalizer/UCharacterProperty.java - platform/libcore - Git at Google

 /*
  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
 /*
  *******************************************************************************
  * Copyright (C) 1996-2014, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */

 package sun.text.normalizer;

 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.MissingResourceException;

 import sun.text.normalizer.UCharacter.HangulSyllableType;
 import sun.text.normalizer.UCharacter.NumericType;

 /**
 * <p>Internal class used for Unicode character property database.</p>
 * <p>This classes store binary data read from uprops.icu.
 * It does not have the capability to parse the data into more high-level
 * information. It only returns bytes of information when required.</p>
 * <p>Due to the form most commonly used for retrieval, array of char is used
 * to store the binary data.</p>
 * <p>UCharacterPropertyDB also contains information on accessing indexes to
 * significant points in the binary data.</p>
 * <p>Responsibility for molding the binary data into more meaning form lies on
 * <a href=UCharacter.html>UCharacter</a>.</p>
 * @author Syn Wee Quek
 * @since release 2.1, february 1st 2002
 */

 final class UCharacterProperty
 {
     // public data members -----------------------------------------------

     /*
      * public singleton instance
      */
     public static final UCharacterProperty INSTANCE;

     /**
     * Trie data
     */
     public Trie2_16 m_trie_;

     /**
     * Unicode version
     */
     public VersionInfo m_unicodeVersion_;

     /**
     * Character type mask
     */
     public static final int TYPE_MASK = 0x1F;

     // uprops.h enum UPropertySource --------------------------------------- ***

     /** From uchar.c/uprops.icu main trie */
     public static final int SRC_CHAR=1;
     /** From uchar.c/uprops.icu properties vectors trie */
     public static final int SRC_PROPSVEC=2;
     /** From ubidi_props.c/ubidi.icu */
     public static final int SRC_BIDI=5;
     /** From normalizer2impl.cpp/nfc.nrm */
     public static final int SRC_NFC=8;
     /** From normalizer2impl.cpp/nfkc.nrm */
     public static final int SRC_NFKC=9;

     // public methods ----------------------------------------------------

     /**
     * Gets the main property value for code point ch.
     * @param ch code point whose property value is to be retrieved
     * @return property value of code point
     */
     public final int getProperty(int ch)
     {
         return m_trie_.get(ch);
     }

     /**
      * Gets the unicode additional properties.
      * Java version of C u_getUnicodeProperties().
      * @param codepoint codepoint whose additional properties is to be
      *                  retrieved
      * @param column The column index.
      * @return unicode properties
      */
     public int getAdditional(int codepoint, int column) {
         assert column >= 0;
         if (column >= m_additionalColumnsCount_) {
             return 0;
         }
         return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
     }

     /**
      * <p>Get the "age" of the code point.</p>
      * <p>The "age" is the Unicode version when the code point was first
      * designated (as a non-character or for Private Use) or assigned a
      * character.</p>
      * <p>This can be useful to avoid emitting code points to receiving
      * processes that do not accept newer characters.</p>
      * <p>The data is from the UCD file DerivedAge.txt.</p>
      * <p>This API does not check the validity of the codepoint.</p>
      * @param codepoint The code point.
      * @return the Unicode version number
      */
     public VersionInfo getAge(int codepoint)
     {
         int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
         return VersionInfo.getInstance(
                            (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
                            version & LAST_NIBBLE_MASK_, 0, 0);
     }

     // int-value and enumerated properties --------------------------------- ***

     public int getType(int c) {
         return getProperty(c)&TYPE_MASK;
     }

     /*
      * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
      * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
      */
     private static final int /* UHangulSyllableType */ gcbToHst[]={
         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
         HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
         HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
         HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
         HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
         HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
         /*
          * Omit GCB values beyond what we need for hst.
          * The code below checks for the array length.
          */
     };

     private class IntProperty {
         int column;  // SRC_PROPSVEC column, or "source" if mask==0
         int mask;
         int shift;

         IntProperty(int column, int mask, int shift) {
             this.column=column;
             this.mask=mask;
             this.shift=shift;
         }

         IntProperty(int source) {
             this.column=source;
             this.mask=0;
         }

         int getValue(int c) {
             // systematic, directly stored properties
             return (getAdditional(c, column)&mask)>>>shift;
         }
     }

     private class BiDiIntProperty extends IntProperty {
         BiDiIntProperty() {
             super(SRC_BIDI);
         }
     }

     private class CombiningClassIntProperty extends IntProperty {
         CombiningClassIntProperty(int source) {
             super(source);
         }
     }

     private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
         int which;
         int max;

         NormQuickCheckIntProperty(int source, int which, int max) {
             super(source);
             this.which=which;
             this.max=max;
         }
     }

     private IntProperty intProp =  new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
         int getValue(int c) {
             return UBiDiProps.INSTANCE.getPairedBracketType(c);
         }
     };

     public int getIntPropertyValue(int c, int which) {
         if (which == BIDI_PAIRED_BRACKET_TYPE) {
             return intProp.getValue(c);
         }
         return 0; // undefined
     }

     /**
     * Forms a supplementary code point from the argument character<br>
     * Note this is for internal use hence no checks for the validity of the
     * surrogate characters are done
     * @param lead lead surrogate character
     * @param trail trailing surrogate character
     * @return code point of the supplementary character
     */
     public static int getRawSupplementary(char lead, char trail)
     {
         return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
     }

     /**
      * Gets the type mask
      * @param type character type
      * @return mask
      */
     public static final int getMask(int type)
     {
         return 1 << type;
     }

     /**
      * Returns the digit values of characters like 'A' - 'Z', normal,
      * half-width and full-width. This method assumes that the other digit
      * characters are checked by the calling method.
      * @param ch character to test
      * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
      *         its corresponding digit will be returned.
      */
     public static int getEuropeanDigit(int ch) {
         if ((ch > 0x7a && ch < 0xff21)
             || ch < 0x41 || (ch > 0x5a && ch < 0x61)
             || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
             return -1;
         }
         if (ch <= 0x7a) {
             // ch >= 0x41 or ch < 0x61
             return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
         }
         // ch >= 0xff21
         if (ch <= 0xff3a) {
             return ch + 10 - 0xff21;
         }
         // ch >= 0xff41 && ch <= 0xff5a
         return ch + 10 - 0xff41;
     }

     public int digit(int c) {
         int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
         if(value<=9) {
             return value;
         } else {
             return -1;
         }
     }

     // protected variables -----------------------------------------------

     /**
      * Extra property trie
      */
     Trie2_16 m_additionalTrie_;
     /**
      * Extra property vectors, 1st column for age and second for binary
      * properties.
      */
     int m_additionalVectors_[];
     /**
      * Number of additional columns
      */
     int m_additionalColumnsCount_;
     /**
      * Maximum values for block, bits used as in vector word
      * 0
      */
     int m_maxBlockScriptValue_;
     /**
      * Maximum values for script, bits used as in vector word
      * 0
      */
      int m_maxJTGValue_;
     /**
      * Script_Extensions data
      */
     public char[] m_scriptExtensions_;

     // private variables -------------------------------------------------

     /**
     * Default name of the datafile
     */
     private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu";

     /**
     * Shift value for lead surrogate to form a supplementary character.
     */
     private static final int LEAD_SURROGATE_SHIFT_ = 10;
     /**
     * Offset to add to combined surrogate pair to avoid masking.
     */
     private static final int SURROGATE_OFFSET_ =
                            UTF16.SUPPLEMENTARY_MIN_VALUE -
                            (UTF16.SURROGATE_MIN_VALUE <<
                            LEAD_SURROGATE_SHIFT_) -
                            UTF16.TRAIL_SURROGATE_MIN_VALUE;


     // property data constants -------------------------------------------------

     /**
      * Numeric types and values in the main properties words.
      */
     private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
     private static final int getNumericTypeValue(int props) {
         return props >> NUMERIC_TYPE_VALUE_SHIFT_;
     }

     /* constants for the storage form of numeric types and values */
     /** No numeric value. */
     private static final int NTV_NONE_ = 0;
     /** Decimal digits: nv=0..9 */
     private static final int NTV_DECIMAL_START_ = 1;
     /** Other digits: nv=0..9 */
     private static final int NTV_DIGIT_START_ = 11;
     /** Small integers: nv=0..154 */
     private static final int NTV_NUMERIC_START_ = 21;

     private static final int ntvGetType(int ntv) {
         return
             (ntv==NTV_NONE_) ? NumericType.NONE :
             (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
             (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
             NumericType.NUMERIC;
     }

     /*
      * Properties in vector word 0
      * Bits
      * 31..24   DerivedAge version major/minor one nibble each
      * 23..22   3..1: Bits 7..0 = Script_Extensions index
      *             3: Script value from Script_Extensions
      *             2: Script=Inherited
      *             1: Script=Common
      *             0: Script=bits 7..0
      * 21..20   reserved
      * 19..17   East Asian Width
      * 16.. 8   UBlockCode
      *  7.. 0   UScriptCode
      */
     /**
      * Script_Extensions: mask includes Script
      */
     public static final int SCRIPT_X_MASK = 0x00c000ff;
     //private static final int SCRIPT_X_SHIFT = 22;
     /**
      * Integer properties mask and shift values for East Asian cell width.
      * Equivalent to icu4c UPROPS_EA_MASK
      */
     private static final int EAST_ASIAN_MASK_ = 0x000e0000;
     /**
      * Integer properties mask and shift values for East Asian cell width.
      * Equivalent to icu4c UPROPS_EA_SHIFT
      */
     private static final int EAST_ASIAN_SHIFT_ = 17;
     /**
      * Integer properties mask and shift values for blocks.
      * Equivalent to icu4c UPROPS_BLOCK_MASK
      */
     private static final int BLOCK_MASK_ = 0x0001ff00;
     /**
      * Integer properties mask and shift values for blocks.
      * Equivalent to icu4c UPROPS_BLOCK_SHIFT
      */
     private static final int BLOCK_SHIFT_ = 8;
     /**
      * Integer properties mask and shift values for scripts.
      * Equivalent to icu4c UPROPS_SHIFT_MASK
      */
     public static final int SCRIPT_MASK_ = 0x000000ff;

     /**
      * Additional properties used in internal trie data
      */
     /*
      * Properties in vector word 1
      * Each bit encodes one binary property.
      * The following constants represent the bit number, use 1<<UPROPS_XYZ.
      * UPROPS_BINARY_1_TOP<=32!
      *
      * Keep this list of property enums in sync with
      * propListNames[] in icu/source/tools/genprops/props2.c!
      *
      * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
      */
     private static final int WHITE_SPACE_PROPERTY_ = 0;
     private static final int DASH_PROPERTY_ = 1;
     private static final int HYPHEN_PROPERTY_ = 2;
     private static final int QUOTATION_MARK_PROPERTY_ = 3;
     private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
     private static final int MATH_PROPERTY_ = 5;
     private static final int HEX_DIGIT_PROPERTY_ = 6;
     private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
     private static final int ALPHABETIC_PROPERTY_ = 8;
     private static final int IDEOGRAPHIC_PROPERTY_ = 9;
     private static final int DIACRITIC_PROPERTY_ = 10;
     private static final int EXTENDER_PROPERTY_ = 11;
     private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
     private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
     private static final int GRAPHEME_LINK_PROPERTY_ = 14;
     private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
     private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
     private static final int RADICAL_PROPERTY_ = 17;
     private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
     private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
     private static final int DEPRECATED_PROPERTY_ = 20;
     private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
     private static final int XID_START_PROPERTY_ = 22;
     private static final int XID_CONTINUE_PROPERTY_ = 23;
     private static final int ID_START_PROPERTY_    = 24;
     private static final int ID_CONTINUE_PROPERTY_ = 25;
     private static final int GRAPHEME_BASE_PROPERTY_ = 26;
     private static final int S_TERM_PROPERTY_ = 27;
     private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
     private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
     private static final int PATTERN_WHITE_SPACE = 30;

     /*
      * Properties in vector word 2
      * Bits
      * 31..26   reserved
      * 25..20   Line Break
      * 19..15   Sentence Break
      * 14..10   Word Break
      *  9.. 5   Grapheme Cluster Break
      *  4.. 0   Decomposition Type
      */
     private static final int LB_MASK          = 0x03f00000;
     private static final int LB_SHIFT         = 20;

     private static final int SB_MASK          = 0x000f8000;
     private static final int SB_SHIFT         = 15;

     private static final int WB_MASK          = 0x00007c00;
     private static final int WB_SHIFT         = 10;

     private static final int GCB_MASK         = 0x000003e0;
     private static final int GCB_SHIFT        = 5;

     /**
      * Integer properties mask for decomposition type.
      * Equivalent to icu4c UPROPS_DT_MASK.
      */
     private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;

     /**
      * First nibble shift
      */
     private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
     /**
      * Second nibble mask
      */
     private static final int LAST_NIBBLE_MASK_ = 0xF;
     /**
      * Age value shift
      */
     private static final int AGE_SHIFT_ = 24;

     // private constructors --------------------------------------------------

     /**
      * Constructor
      * @exception IOException thrown when data reading fails or data corrupted
      */
     private UCharacterProperty() throws IOException
     {
         // jar access
         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
         m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
         // Read or skip the 16 indexes.
         int propertyOffset = bytes.getInt();
         /* exceptionOffset = */ bytes.getInt();
         /* caseOffset = */ bytes.getInt();
         int additionalOffset = bytes.getInt();
         int additionalVectorsOffset = bytes.getInt();
         m_additionalColumnsCount_ = bytes.getInt();
         int scriptExtensionsOffset = bytes.getInt();
         int reservedOffset7 = bytes.getInt();
         /* reservedOffset8 = */ bytes.getInt();
         /* dataTopOffset = */ bytes.getInt();
         m_maxBlockScriptValue_ = bytes.getInt();
         m_maxJTGValue_ = bytes.getInt();
         ICUBinary.skipBytes(bytes, (16 - 12) << 2);

         // read the main properties trie
         m_trie_ = Trie2_16.createFromSerialized(bytes);
         int expectedTrieLength = (propertyOffset - 16) * 4;
         int trieLength = m_trie_.getSerializedLength();
         if(trieLength > expectedTrieLength) {
             throw new IOException("uprops.icu: not enough bytes for main trie");
         }
         // skip padding after trie bytes
         ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);

         // skip unused intervening data structures
         ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);

         if(m_additionalColumnsCount_ > 0) {
             // reads the additional property block
             m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
             expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
             trieLength = m_additionalTrie_.getSerializedLength();
             if(trieLength > expectedTrieLength) {
                 throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
             }
             // skip padding after trie bytes
             ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);

             // additional properties
             int size = scriptExtensionsOffset - additionalVectorsOffset;
             m_additionalVectors_ = new int[size];
             for (int i = 0; i < size; i ++) {
                 m_additionalVectors_[i] = bytes.getInt();
             }
         }

         // Script_Extensions
         int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
         if(numChars > 0) {
             m_scriptExtensions_ = new char[numChars];
             for(int i = 0; i < numChars; ++i) {
                 m_scriptExtensions_[i] = bytes.getChar();
             }
         }
     }

     private static final class IsAcceptable implements ICUBinary.Authenticate {
         // @Override when we switch to Java 6
         public boolean isDataVersionAcceptable(byte version[]) {
             return version[0] == 7;
         }
     }

     private static final int DATA_FORMAT = 0x5550726F;  // "UPro"

     public void upropsvec_addPropertyStarts(UnicodeSet set) {
         /* add the start code point of each same-value range of the properties vectors trie */
         if(m_additionalColumnsCount_>0) {
             /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
             Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
             Trie2.Range range;
             while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
                 set.add(range.startCodePoint);
             }
         }
     }

     // This static initializer block must be placed after
     // other static member initialization
     static {
         try {
             INSTANCE = new UCharacterProperty();
         }
         catch (IOException e) {
             throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,"");
         }
     }


     // Moved from UProperty.java
     /**
      * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
      * Used in UAX #9: Unicode Bidirectional Algorithm
      * (http://www.unicode.org/reports/tr9/)
      * Returns UCharacter.BidiPairedBracketType values.
      * @stable ICU 52
      */
     public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;

 }
	/*
	* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/
	/*
	*******************************************************************************
	* Copyright (C) 1996-2014, International Business Machines Corporation and
	* others. All Rights Reserved.
	*******************************************************************************
	*/

	package sun.text.normalizer;

	import java.io.IOException;
	import java.nio.ByteBuffer;
	import java.util.Iterator;
	import java.util.MissingResourceException;

	import sun.text.normalizer.UCharacter.HangulSyllableType;
	import sun.text.normalizer.UCharacter.NumericType;

	/**
	* <p>Internal class used for Unicode character property database.</p>
	* <p>This classes store binary data read from uprops.icu.
	* It does not have the capability to parse the data into more high-level
	* information. It only returns bytes of information when required.</p>
	* <p>Due to the form most commonly used for retrieval, array of char is used
	* to store the binary data.</p>
	* <p>UCharacterPropertyDB also contains information on accessing indexes to
	* significant points in the binary data.</p>
	* <p>Responsibility for molding the binary data into more meaning form lies on
	* <a href=UCharacter.html>UCharacter</a>.</p>
	* @author Syn Wee Quek
	* @since release 2.1, february 1st 2002
	*/

	final class UCharacterProperty
	{
	// public data members -----------------------------------------------

	/*
	* public singleton instance
	*/
	public static final UCharacterProperty INSTANCE;

	/**
	* Trie data
	*/
	public Trie2_16 m_trie_;

	/**
	* Unicode version
	*/
	public VersionInfo m_unicodeVersion_;

	/**
	* Character type mask
	*/
	public static final int TYPE_MASK = 0x1F;

	// uprops.h enum UPropertySource --------------------------------------- ***

	/** From uchar.c/uprops.icu main trie */
	public static final int SRC_CHAR=1;
	/** From uchar.c/uprops.icu properties vectors trie */
	public static final int SRC_PROPSVEC=2;
	/** From ubidi_props.c/ubidi.icu */
	public static final int SRC_BIDI=5;
	/** From normalizer2impl.cpp/nfc.nrm */
	public static final int SRC_NFC=8;
	/** From normalizer2impl.cpp/nfkc.nrm */
	public static final int SRC_NFKC=9;

	// public methods ----------------------------------------------------

	/**
	* Gets the main property value for code point ch.
	* @param ch code point whose property value is to be retrieved
	* @return property value of code point
	*/
	public final int getProperty(int ch)
	{
	return m_trie_.get(ch);
	}

	/**
	* Gets the unicode additional properties.
	* Java version of C u_getUnicodeProperties().
	* @param codepoint codepoint whose additional properties is to be
	* retrieved
	* @param column The column index.
	* @return unicode properties
	*/
	public int getAdditional(int codepoint, int column) {
	assert column >= 0;
	if (column >= m_additionalColumnsCount_) {
	return 0;
	}
	return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
	}

	/**
	* <p>Get the "age" of the code point.</p>
	* <p>The "age" is the Unicode version when the code point was first
	* designated (as a non-character or for Private Use) or assigned a
	* character.</p>
	* <p>This can be useful to avoid emitting code points to receiving
	* processes that do not accept newer characters.</p>
	* <p>The data is from the UCD file DerivedAge.txt.</p>
	* <p>This API does not check the validity of the codepoint.</p>
	* @param codepoint The code point.
	* @return the Unicode version number
	*/
	public VersionInfo getAge(int codepoint)
	{
	int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
	return VersionInfo.getInstance(
	(version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
	version & LAST_NIBBLE_MASK_, 0, 0);
	}

	// int-value and enumerated properties --------------------------------- ***

	public int getType(int c) {
	return getProperty(c)&TYPE_MASK;
	}

	/*
	* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
	* Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
	*/
	private static final int /* UHangulSyllableType */ gcbToHst[]={
	HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */
	HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */
	HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */
	HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */
	HangulSyllableType.LEADING_JAMO, /* U_GCB_L */
	HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */
	HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */
	HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */
	HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */
	HangulSyllableType.VOWEL_JAMO /* U_GCB_V */
	/*
	* Omit GCB values beyond what we need for hst.
	* The code below checks for the array length.
	*/
	};

	private class IntProperty {
	int column; // SRC_PROPSVEC column, or "source" if mask==0
	int mask;
	int shift;

	IntProperty(int column, int mask, int shift) {
	this.column=column;
	this.mask=mask;
	this.shift=shift;
	}

	IntProperty(int source) {
	this.column=source;
	this.mask=0;
	}

	int getValue(int c) {
	// systematic, directly stored properties
	return (getAdditional(c, column)&mask)>>>shift;
	}
	}

	private class BiDiIntProperty extends IntProperty {
	BiDiIntProperty() {
	super(SRC_BIDI);
	}
	}

	private class CombiningClassIntProperty extends IntProperty {
	CombiningClassIntProperty(int source) {
	super(source);
	}
	}

	private class NormQuickCheckIntProperty extends IntProperty { // UCHAR_NF*_QUICK_CHECK properties
	int which;
	int max;

	NormQuickCheckIntProperty(int source, int which, int max) {
	super(source);
	this.which=which;
	this.max=max;
	}
	}

	private IntProperty intProp = new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE
	int getValue(int c) {
	return UBiDiProps.INSTANCE.getPairedBracketType(c);
	}
	};

	public int getIntPropertyValue(int c, int which) {
	if (which == BIDI_PAIRED_BRACKET_TYPE) {
	return intProp.getValue(c);
	}
	return 0; // undefined
	}

	/**
	* Forms a supplementary code point from the argument character<br>
	* Note this is for internal use hence no checks for the validity of the
	* surrogate characters are done
	* @param lead lead surrogate character
	* @param trail trailing surrogate character
	* @return code point of the supplementary character
	*/
	public static int getRawSupplementary(char lead, char trail)
	{
	return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
	}

	/**
	* Gets the type mask
	* @param type character type
	* @return mask
	*/
	public static final int getMask(int type)
	{
	return 1 << type;
	}

	/**
	* Returns the digit values of characters like 'A' - 'Z', normal,
	* half-width and full-width. This method assumes that the other digit
	* characters are checked by the calling method.
	* @param ch character to test
	* @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
	* its corresponding digit will be returned.
	*/
	public static int getEuropeanDigit(int ch) {
	if ((ch > 0x7a && ch < 0xff21)
	\|\| ch < 0x41 \|\| (ch > 0x5a && ch < 0x61)
	\|\| ch > 0xff5a \|\| (ch > 0xff3a && ch < 0xff41)) {
	return -1;
	}
	if (ch <= 0x7a) {
	// ch >= 0x41 or ch < 0x61
	return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
	}
	// ch >= 0xff21
	if (ch <= 0xff3a) {
	return ch + 10 - 0xff21;
	}
	// ch >= 0xff41 && ch <= 0xff5a
	return ch + 10 - 0xff41;
	}

	public int digit(int c) {
	int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
	if(value<=9) {
	return value;
	} else {
	return -1;
	}
	}

	// protected variables -----------------------------------------------

	/**
	* Extra property trie
	*/
	Trie2_16 m_additionalTrie_;
	/**
	* Extra property vectors, 1st column for age and second for binary
	* properties.
	*/
	int m_additionalVectors_[];
	/**
	* Number of additional columns
	*/
	int m_additionalColumnsCount_;
	/**
	* Maximum values for block, bits used as in vector word
	* 0
	*/
	int m_maxBlockScriptValue_;
	/**
	* Maximum values for script, bits used as in vector word
	* 0
	*/
	int m_maxJTGValue_;
	/**
	* Script_Extensions data
	*/
	public char[] m_scriptExtensions_;

	// private variables -------------------------------------------------

	/**
	* Default name of the datafile
	*/
	private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu";

	/**
	* Shift value for lead surrogate to form a supplementary character.
	*/
	private static final int LEAD_SURROGATE_SHIFT_ = 10;
	/**
	* Offset to add to combined surrogate pair to avoid masking.
	*/
	private static final int SURROGATE_OFFSET_ =
	UTF16.SUPPLEMENTARY_MIN_VALUE -
	(UTF16.SURROGATE_MIN_VALUE <<
	LEAD_SURROGATE_SHIFT_) -
	UTF16.TRAIL_SURROGATE_MIN_VALUE;


	// property data constants -------------------------------------------------

	/**
	* Numeric types and values in the main properties words.
	*/
	private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
	private static final int getNumericTypeValue(int props) {
	return props >> NUMERIC_TYPE_VALUE_SHIFT_;
	}

	/* constants for the storage form of numeric types and values */
	/** No numeric value. */
	private static final int NTV_NONE_ = 0;
	/** Decimal digits: nv=0..9 */
	private static final int NTV_DECIMAL_START_ = 1;
	/** Other digits: nv=0..9 */
	private static final int NTV_DIGIT_START_ = 11;
	/** Small integers: nv=0..154 */
	private static final int NTV_NUMERIC_START_ = 21;

	private static final int ntvGetType(int ntv) {
	return
	(ntv==NTV_NONE_) ? NumericType.NONE :
	(ntv<NTV_DIGIT_START_) ? NumericType.DECIMAL :
	(ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
	NumericType.NUMERIC;
	}

	/*
	* Properties in vector word 0
	* Bits
	* 31..24 DerivedAge version major/minor one nibble each
	* 23..22 3..1: Bits 7..0 = Script_Extensions index
	* 3: Script value from Script_Extensions
	* 2: Script=Inherited
	* 1: Script=Common
	* 0: Script=bits 7..0
	* 21..20 reserved
	* 19..17 East Asian Width
	* 16.. 8 UBlockCode
	* 7.. 0 UScriptCode
	*/
	/**
	* Script_Extensions: mask includes Script
	*/
	public static final int SCRIPT_X_MASK = 0x00c000ff;
	//private static final int SCRIPT_X_SHIFT = 22;
	/**
	* Integer properties mask and shift values for East Asian cell width.
	* Equivalent to icu4c UPROPS_EA_MASK
	*/
	private static final int EAST_ASIAN_MASK_ = 0x000e0000;
	/**
	* Integer properties mask and shift values for East Asian cell width.
	* Equivalent to icu4c UPROPS_EA_SHIFT
	*/
	private static final int EAST_ASIAN_SHIFT_ = 17;
	/**
	* Integer properties mask and shift values for blocks.
	* Equivalent to icu4c UPROPS_BLOCK_MASK
	*/
	private static final int BLOCK_MASK_ = 0x0001ff00;
	/**
	* Integer properties mask and shift values for blocks.
	* Equivalent to icu4c UPROPS_BLOCK_SHIFT
	*/
	private static final int BLOCK_SHIFT_ = 8;
	/**
	* Integer properties mask and shift values for scripts.
	* Equivalent to icu4c UPROPS_SHIFT_MASK
	*/
	public static final int SCRIPT_MASK_ = 0x000000ff;

	/**
	* Additional properties used in internal trie data
	*/
	/*
	* Properties in vector word 1
	* Each bit encodes one binary property.
	* The following constants represent the bit number, use 1<<UPROPS_XYZ.
	* UPROPS_BINARY_1_TOP<=32!
	*
	* Keep this list of property enums in sync with
	* propListNames[] in icu/source/tools/genprops/props2.c!
	*
	* ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
	*/
	private static final int WHITE_SPACE_PROPERTY_ = 0;
	private static final int DASH_PROPERTY_ = 1;
	private static final int HYPHEN_PROPERTY_ = 2;
	private static final int QUOTATION_MARK_PROPERTY_ = 3;
	private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
	private static final int MATH_PROPERTY_ = 5;
	private static final int HEX_DIGIT_PROPERTY_ = 6;
	private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
	private static final int ALPHABETIC_PROPERTY_ = 8;
	private static final int IDEOGRAPHIC_PROPERTY_ = 9;
	private static final int DIACRITIC_PROPERTY_ = 10;
	private static final int EXTENDER_PROPERTY_ = 11;
	private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
	private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
	private static final int GRAPHEME_LINK_PROPERTY_ = 14;
	private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
	private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
	private static final int RADICAL_PROPERTY_ = 17;
	private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
	private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
	private static final int DEPRECATED_PROPERTY_ = 20;
	private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
	private static final int XID_START_PROPERTY_ = 22;
	private static final int XID_CONTINUE_PROPERTY_ = 23;
	private static final int ID_START_PROPERTY_ = 24;
	private static final int ID_CONTINUE_PROPERTY_ = 25;
	private static final int GRAPHEME_BASE_PROPERTY_ = 26;
	private static final int S_TERM_PROPERTY_ = 27;
	private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
	private static final int PATTERN_SYNTAX = 29; /* new in ICU 3.4 and Unicode 4.1 */
	private static final int PATTERN_WHITE_SPACE = 30;

	/*
	* Properties in vector word 2
	* Bits
	* 31..26 reserved
	* 25..20 Line Break
	* 19..15 Sentence Break
	* 14..10 Word Break
	* 9.. 5 Grapheme Cluster Break
	* 4.. 0 Decomposition Type
	*/
	private static final int LB_MASK = 0x03f00000;
	private static final int LB_SHIFT = 20;

	private static final int SB_MASK = 0x000f8000;
	private static final int SB_SHIFT = 15;

	private static final int WB_MASK = 0x00007c00;
	private static final int WB_SHIFT = 10;

	private static final int GCB_MASK = 0x000003e0;
	private static final int GCB_SHIFT = 5;

	/**
	* Integer properties mask for decomposition type.
	* Equivalent to icu4c UPROPS_DT_MASK.
	*/
	private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;

	/**
	* First nibble shift
	*/
	private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
	/**
	* Second nibble mask
	*/
	private static final int LAST_NIBBLE_MASK_ = 0xF;
	/**
	* Age value shift
	*/
	private static final int AGE_SHIFT_ = 24;

	// private constructors --------------------------------------------------

	/**
	* Constructor
	* @exception IOException thrown when data reading fails or data corrupted
	*/
	private UCharacterProperty() throws IOException
	{
	// jar access
	ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
	m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
	// Read or skip the 16 indexes.
	int propertyOffset = bytes.getInt();
	/* exceptionOffset = */ bytes.getInt();
	/* caseOffset = */ bytes.getInt();
	int additionalOffset = bytes.getInt();
	int additionalVectorsOffset = bytes.getInt();
	m_additionalColumnsCount_ = bytes.getInt();
	int scriptExtensionsOffset = bytes.getInt();
	int reservedOffset7 = bytes.getInt();
	/* reservedOffset8 = */ bytes.getInt();
	/* dataTopOffset = */ bytes.getInt();
	m_maxBlockScriptValue_ = bytes.getInt();
	m_maxJTGValue_ = bytes.getInt();
	ICUBinary.skipBytes(bytes, (16 - 12) << 2);

	// read the main properties trie
	m_trie_ = Trie2_16.createFromSerialized(bytes);
	int expectedTrieLength = (propertyOffset - 16) * 4;
	int trieLength = m_trie_.getSerializedLength();
	if(trieLength > expectedTrieLength) {
	throw new IOException("uprops.icu: not enough bytes for main trie");
	}
	// skip padding after trie bytes
	ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);

	// skip unused intervening data structures
	ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);

	if(m_additionalColumnsCount_ > 0) {
	// reads the additional property block
	m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
	expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
	trieLength = m_additionalTrie_.getSerializedLength();
	if(trieLength > expectedTrieLength) {
	throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
	}
	// skip padding after trie bytes
	ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);

	// additional properties
	int size = scriptExtensionsOffset - additionalVectorsOffset;
	m_additionalVectors_ = new int[size];
	for (int i = 0; i < size; i ++) {
	m_additionalVectors_[i] = bytes.getInt();
	}
	}

	// Script_Extensions
	int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
	if(numChars > 0) {
	m_scriptExtensions_ = new char[numChars];
	for(int i = 0; i < numChars; ++i) {
	m_scriptExtensions_[i] = bytes.getChar();
	}
	}
	}

	private static final class IsAcceptable implements ICUBinary.Authenticate {
	// @Override when we switch to Java 6
	public boolean isDataVersionAcceptable(byte version[]) {
	return version[0] == 7;
	}
	}

	private static final int DATA_FORMAT = 0x5550726F; // "UPro"

	public void upropsvec_addPropertyStarts(UnicodeSet set) {
	/* add the start code point of each same-value range of the properties vectors trie */
	if(m_additionalColumnsCount_>0) {
	/* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
	Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
	Trie2.Range range;
	while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
	set.add(range.startCodePoint);
	}
	}
	}

	// This static initializer block must be placed after
	// other static member initialization
	static {
	try {
	INSTANCE = new UCharacterProperty();
	}
	catch (IOException e) {
	throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,"");
	}
	}


	// Moved from UProperty.java
	/**
	* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
	* Used in UAX #9: Unicode Bidirectional Algorithm
	* (http://www.unicode.org/reports/tr9/)
	* Returns UCharacter.BidiPairedBracketType values.
	* @stable ICU 52
	*/
	public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;

	}