| /* |
| * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2009-2014, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ******************************************************************************* |
| */ |
| |
| package sun.text.normalizer; |
| |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| |
| |
| /** |
| * @author aheninger |
| * |
| * A read-only Trie2, holding 16 bit data values. |
| * |
| * A Trie2 is a highly optimized data structure for mapping from Unicode |
| * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. |
| * |
| * See class Trie2 for descriptions of the API for accessing the contents of a trie. |
| * |
| * The fundamental data access methods are declared final in this class, with |
| * the intent that applications might gain a little extra performance, when compared |
| * with calling the same methods via the abstract UTrie2 base class. |
| */ |
| public final class Trie2_16 extends Trie2 { |
| |
| /** |
| * Internal constructor, not for general use. |
| */ |
| Trie2_16() { |
| } |
| |
| |
| /** |
| * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). |
| * The serialized format is identical between ICU4C and ICU4J, so this function |
| * will work with serialized Trie2s from either. |
| * |
| * The serialized Trie2 in the bytes may be in either little or big endian byte order. |
| * This allows using serialized Tries from ICU4C without needing to consider the |
| * byte order of the system that created them. |
| * |
| * @param bytes a byte buffer to the serialized form of a UTrie2. |
| * @return An unserialized Trie2_16, ready for use. |
| * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. |
| * @throws IOException if a read error occurs in the buffer. |
| * @throws ClassCastException if the bytes contain a serialized Trie2_32 |
| */ |
| public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { |
| return (Trie2_16) Trie2.createFromSerialized(bytes); |
| } |
| |
| /** |
| * Get the value for a code point as stored in the Trie2. |
| * |
| * @param codePoint the code point |
| * @return the value |
| */ |
| @Override |
| public final int get(int codePoint) { |
| int value; |
| int ix; |
| |
| if (codePoint >= 0) { |
| if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { |
| // Ordinary BMP code point, excluding leading surrogates. |
| // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. |
| // 16 bit data is stored in the index array itself. |
| ix = index[codePoint >> UTRIE2_SHIFT_2]; |
| ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); |
| value = index[ix]; |
| return value; |
| } |
| if (codePoint <= 0xffff) { |
| // Lead Surrogate Code Point. A Separate index section is stored for |
| // lead surrogate code units and code points. |
| // The main index has the code unit data. |
| // For this function, we need the code point data. |
| // Note: this expression could be refactored for slightly improved efficiency, but |
| // surrogate code points will be so rare in practice that it's not worth it. |
| ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; |
| ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); |
| value = index[ix]; |
| return value; |
| } |
| if (codePoint < highStart) { |
| // Supplemental code point, use two-level lookup. |
| ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); |
| ix = index[ix]; |
| ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; |
| ix = index[ix]; |
| ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); |
| value = index[ix]; |
| return value; |
| } |
| if (codePoint <= 0x10ffff) { |
| value = index[highValueIndex]; |
| return value; |
| } |
| } |
| |
| // Fall through. The code point is outside of the legal range of 0..0x10ffff. |
| return errorValue; |
| } |
| |
| |
| /** |
| * Get a Trie2 value for a UTF-16 code unit. |
| * |
| * This function returns the same value as get() if the input |
| * character is outside of the lead surrogate range |
| * |
| * There are two values stored in a Trie2 for inputs in the lead |
| * surrogate range. This function returns the alternate value, |
| * while Trie2.get() returns the main value. |
| * |
| * @param codeUnit a 16 bit code unit or lead surrogate value. |
| * @return the value |
| */ |
| @Override |
| public int getFromU16SingleLead(char codeUnit) { |
| int value; |
| int ix; |
| |
| // Because the input is a 16 bit char, we can skip the tests for it being in |
| // the BMP range. It is. |
| ix = index[codeUnit >> UTRIE2_SHIFT_2]; |
| ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); |
| value = index[ix]; |
| return value; |
| } |
| |
| /** |
| * @return the number of bytes of the serialized trie |
| */ |
| public int getSerializedLength() { |
| return 16+(header.indexLength+dataLength)*2; |
| } |
| } |