| /* |
| * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC. |
| */ |
| |
| package com.sun.xml.internal.fastinfoset; |
| |
| public class DecoderStateTables { |
| private static int RANGE_INDEX_END = 0; |
| private static int RANGE_INDEX_VALUE = 1; |
| |
| public final static int STATE_ILLEGAL = 255; |
| public final static int STATE_UNSUPPORTED = 254; |
| |
| // EII child states |
| public final static int EII_NO_AIIS_INDEX_SMALL = 0; |
| public final static int EII_AIIS_INDEX_SMALL = 1; |
| public final static int EII_INDEX_MEDIUM = 2; |
| public final static int EII_INDEX_LARGE = 3; |
| public final static int EII_NAMESPACES = 4; |
| public final static int EII_LITERAL = 5; |
| public final static int CII_UTF8_SMALL_LENGTH = 6; |
| public final static int CII_UTF8_MEDIUM_LENGTH = 7; |
| public final static int CII_UTF8_LARGE_LENGTH = 8; |
| public final static int CII_UTF16_SMALL_LENGTH = 9; |
| public final static int CII_UTF16_MEDIUM_LENGTH = 10; |
| public final static int CII_UTF16_LARGE_LENGTH = 11; |
| public final static int CII_RA = 12; |
| public final static int CII_EA = 13; |
| public final static int CII_INDEX_SMALL = 14; |
| public final static int CII_INDEX_MEDIUM = 15; |
| public final static int CII_INDEX_LARGE = 16; |
| public final static int CII_INDEX_LARGE_LARGE = 17; |
| public final static int COMMENT_II = 18; |
| public final static int PROCESSING_INSTRUCTION_II = 19; |
| public final static int DOCUMENT_TYPE_DECLARATION_II = 20; |
| public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21; |
| public final static int TERMINATOR_SINGLE = 22; |
| public final static int TERMINATOR_DOUBLE = 23; |
| |
| private static final int[] DII = new int[256]; |
| |
| private static final int[][] DII_RANGES = { |
| // EII |
| |
| // %00000000 to %00011111 EII no attributes small index |
| { 0x1F, EII_NO_AIIS_INDEX_SMALL }, |
| |
| // %00100000 to %00100111 EII medium index |
| { 0x27, EII_INDEX_MEDIUM }, |
| |
| // %00101000 to %00101111 EII large index |
| // %00110000 EII very large index |
| // %00101000 to %00110000 |
| { 0x30, EII_INDEX_LARGE }, |
| |
| // %00110001 to %00110111 ILLEGAL |
| { 0x37, STATE_ILLEGAL }, |
| |
| // %00111000 EII namespaces |
| { 0x38, EII_NAMESPACES }, |
| |
| // %00111001 to %00111011 ILLEGAL |
| { 0x3B, STATE_ILLEGAL }, |
| |
| // %00111100 EII literal (no prefix, no namespace) |
| { 0x3C, EII_LITERAL }, |
| |
| // %00111101 EII literal (no prefix, namespace) |
| { 0x3D, EII_LITERAL }, |
| |
| // %00111110 ILLEGAL |
| { 0x3E, STATE_ILLEGAL }, |
| |
| // %00111111 EII literal (prefix, namespace) |
| { 0x3F, EII_LITERAL }, |
| |
| // %01000000 to %01011111 EII attributes small index |
| { 0x5F, EII_AIIS_INDEX_SMALL }, |
| |
| // %01100000 to %01100111 EII medium index |
| { 0x67, EII_INDEX_MEDIUM }, |
| |
| // %01101000 to %01101111 EII large index |
| // %01110000 EII very large index |
| // %01101000 to %01110000 |
| { 0x70, EII_INDEX_LARGE }, |
| |
| // %01110001 to %01110111 ILLEGAL |
| { 0x77, STATE_ILLEGAL }, |
| |
| // %01111000 EII attributes namespaces |
| { 0x78, EII_NAMESPACES }, |
| |
| // %01111001 to %01111011 ILLEGAL |
| { 0x7B, STATE_ILLEGAL }, |
| |
| // %01111100 EII attributes literal (no prefix, no namespace) |
| { 0x7C, EII_LITERAL }, |
| |
| // %01111101 EII attributes literal (no prefix, namespace) |
| { 0x7D, EII_LITERAL }, |
| |
| // %01111110 ILLEGAL |
| { 0x7E, STATE_ILLEGAL }, |
| |
| // %01111111 EII attributes literal (prefix, namespace) |
| { 0x7F, EII_LITERAL }, |
| |
| // %10000000 to %11000011 |
| { 0xC3, STATE_ILLEGAL }, |
| |
| // %11000100 to %11000111 |
| { 0xC7, DOCUMENT_TYPE_DECLARATION_II }, |
| |
| // %11001000 to %1110000 |
| { 0xE0, STATE_ILLEGAL }, |
| |
| // %11100001 processing instruction |
| { 0xE1, PROCESSING_INSTRUCTION_II }, |
| |
| // %11100010 comment |
| { 0xE2, COMMENT_II}, |
| |
| // %111000011 to %11101111 |
| { 0xEF, STATE_ILLEGAL }, |
| |
| // Terminators |
| |
| // %11110000 single terminator |
| { 0xF0, TERMINATOR_SINGLE }, |
| |
| // %11110000 to %11111110 ILLEGAL |
| { 0xFE, STATE_ILLEGAL }, |
| |
| // %11111111 double terminator |
| { 0xFF, TERMINATOR_DOUBLE } |
| }; |
| |
| private static final int[] EII = new int[256]; |
| |
| private static final int[][] EII_RANGES = { |
| // EII |
| |
| // %00000000 to %00011111 EII no attributes small index |
| { 0x1F, EII_NO_AIIS_INDEX_SMALL }, |
| |
| // %00100000 to %00100111 EII medium index |
| { 0x27, EII_INDEX_MEDIUM }, |
| |
| // %00101000 to %00101111 EII large index |
| // %00110000 EII very large index |
| // %00101000 to %00110000 |
| { 0x30, EII_INDEX_LARGE }, |
| |
| // %00110001 to %00110111 ILLEGAL |
| { 0x37, STATE_ILLEGAL }, |
| |
| // %00111000 EII namespaces |
| { 0x38, EII_NAMESPACES }, |
| |
| // %00111001 to %00111011 ILLEGAL |
| { 0x3B, STATE_ILLEGAL }, |
| |
| // %00111100 EII literal (no prefix, no namespace) |
| { 0x3C, EII_LITERAL }, |
| |
| // %00111101 EII literal (no prefix, namespace) |
| { 0x3D, EII_LITERAL }, |
| |
| // %00111110 ILLEGAL |
| { 0x3E, STATE_ILLEGAL }, |
| |
| // %00111111 EII literal (prefix, namespace) |
| { 0x3F, EII_LITERAL }, |
| |
| // %01000000 to %01011111 EII attributes small index |
| { 0x5F, EII_AIIS_INDEX_SMALL }, |
| |
| // %01100000 to %01100111 EII medium index |
| { 0x67, EII_INDEX_MEDIUM }, |
| |
| // %01101000 to %01101111 EII large index |
| // %01110000 EII very large index |
| // %01101000 to %01110000 |
| { 0x70, EII_INDEX_LARGE }, |
| |
| // %01110001 to %01110111 ILLEGAL |
| { 0x77, STATE_ILLEGAL }, |
| |
| // %01111000 EII attributes namespaces |
| { 0x78, EII_NAMESPACES }, |
| |
| // %01111001 to %01111011 ILLEGAL |
| { 0x7B, STATE_ILLEGAL }, |
| |
| // %01111100 EII attributes literal (no prefix, no namespace) |
| { 0x7C, EII_LITERAL }, |
| |
| // %01111101 EII attributes literal (no prefix, namespace) |
| { 0x7D, EII_LITERAL }, |
| |
| // %01111110 ILLEGAL |
| { 0x7E, STATE_ILLEGAL }, |
| |
| // %01111111 EII attributes literal (prefix, namespace) |
| { 0x7F, EII_LITERAL }, |
| |
| // CII |
| |
| // UTF-8 string |
| |
| // %10000000 to %10000001 CII UTF-8 no add to table small length |
| { 0x81, CII_UTF8_SMALL_LENGTH }, |
| |
| // %10000010 CII UTF-8 no add to table medium length |
| { 0x82, CII_UTF8_MEDIUM_LENGTH }, |
| |
| // %10000011 CII UTF-8 no add to table large length |
| { 0x83, CII_UTF8_LARGE_LENGTH }, |
| |
| // UTF-16 string |
| |
| // %10000100 to %10000101 CII UTF-16 no add to table small length |
| { 0x85, CII_UTF16_SMALL_LENGTH }, |
| |
| // %10000110 CII UTF-16 no add to table medium length |
| { 0x86, CII_UTF16_MEDIUM_LENGTH }, |
| |
| // %10000111 CII UTF-16 no add to table large length |
| { 0x87, CII_UTF16_LARGE_LENGTH }, |
| |
| // Resitricted alphabet |
| |
| // %10001000 to %10001011 CII RA no add to table |
| { 0x8B, CII_RA }, |
| |
| // Encoding algorithm |
| |
| // %10001100 to %10001111 CII EA no add to table |
| { 0x8F, CII_EA }, |
| |
| // UTF-8 string, add to table |
| |
| // %10010000 to %10010001 CII add to table small length |
| { 0x91, CII_UTF8_SMALL_LENGTH }, |
| |
| // %10010010 CII add to table medium length |
| { 0x92, CII_UTF8_MEDIUM_LENGTH }, |
| |
| // %10010011 CII add to table large length |
| { 0x93, CII_UTF8_LARGE_LENGTH }, |
| |
| // UTF-16 string, add to table |
| |
| // %10010100 to %10010101 CII UTF-16 add to table small length |
| { 0x95, CII_UTF16_SMALL_LENGTH }, |
| |
| // %10010110 CII UTF-16 add to table medium length |
| { 0x96, CII_UTF16_MEDIUM_LENGTH }, |
| |
| // %10010111 CII UTF-16 add to table large length |
| { 0x97, CII_UTF16_LARGE_LENGTH }, |
| |
| // Restricted alphabet, add to table |
| |
| // %10011000 to %10011011 CII RA add to table |
| { 0x9B, CII_RA }, |
| |
| // Encoding algorithm, add to table |
| |
| // %10011100 to %10011111 CII EA add to table |
| { 0x9F, CII_EA }, |
| |
| // Index |
| |
| // %10100000 to %10101111 CII small index |
| { 0xAF, CII_INDEX_SMALL }, |
| |
| // %10110000 to %10110011 CII medium index |
| { 0xB3, CII_INDEX_MEDIUM }, |
| |
| // %10110100 to %10110111 CII large index |
| { 0xB7, CII_INDEX_LARGE }, |
| |
| // %10111000 CII very large index |
| { 0xB8, CII_INDEX_LARGE_LARGE }, |
| |
| // %10111001 to %11000111 ILLEGAL |
| { 0xC7, STATE_ILLEGAL }, |
| |
| // %11001000 to %11001011 |
| { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II }, |
| |
| // %11001100 to %11100000 ILLEGAL |
| { 0xE0, STATE_ILLEGAL }, |
| |
| // %11100001 processing instruction |
| { 0xE1, PROCESSING_INSTRUCTION_II }, |
| |
| // %11100010 comment |
| { 0xE2, COMMENT_II}, |
| |
| // %111000011 to %11101111 |
| { 0xEF, STATE_ILLEGAL }, |
| |
| // Terminators |
| |
| // %11110000 single terminator |
| { 0xF0, TERMINATOR_SINGLE }, |
| |
| // %11110000 to %11111110 ILLEGAL |
| { 0xFE, STATE_ILLEGAL }, |
| |
| // %11111111 double terminator |
| { 0xFF, TERMINATOR_DOUBLE } |
| }; |
| |
| |
| // AII states |
| public final static int AII_INDEX_SMALL = 0; |
| public final static int AII_INDEX_MEDIUM = 1; |
| public final static int AII_INDEX_LARGE = 2; |
| public final static int AII_LITERAL = 3; |
| public final static int AII_TERMINATOR_SINGLE = 4; |
| public final static int AII_TERMINATOR_DOUBLE = 5; |
| |
| private static final int[] AII = new int[256]; |
| |
| private static final int[][] AII_RANGES = { |
| // %00000000 to %00111111 AII small index |
| { 0x3F, AII_INDEX_SMALL }, |
| |
| // %01000000 to %01011111 AII medium index |
| { 0x5F, AII_INDEX_MEDIUM }, |
| |
| // %01100000 to %01101111 AII large index |
| { 0x6F, AII_INDEX_LARGE }, |
| |
| // %01110000 to %01110111 ILLEGAL |
| { 0x77, STATE_ILLEGAL }, |
| |
| // %01111000 AII literal (no prefix, no namespace) |
| // %01111001 AII literal (no prefix, namespace) |
| { 0x79, AII_LITERAL }, |
| |
| // %01111010 ILLEGAL |
| { 0x7A, STATE_ILLEGAL }, |
| |
| // %01111011 AII literal (prefix, namespace) |
| { 0x7B, AII_LITERAL }, |
| |
| // %10000000 to %11101111 ILLEGAL |
| { 0xEF, STATE_ILLEGAL }, |
| |
| // Terminators |
| |
| // %11110000 single terminator |
| { 0xF0, AII_TERMINATOR_SINGLE }, |
| |
| // %11110000 to %11111110 ILLEGAL |
| { 0xFE, STATE_ILLEGAL }, |
| |
| // %11111111 double terminator |
| { 0xFF, AII_TERMINATOR_DOUBLE } |
| }; |
| |
| |
| // AII value states |
| public final static int NISTRING_UTF8_SMALL_LENGTH = 0; |
| public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1; |
| public final static int NISTRING_UTF8_LARGE_LENGTH = 2; |
| public final static int NISTRING_UTF16_SMALL_LENGTH = 3; |
| public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4; |
| public final static int NISTRING_UTF16_LARGE_LENGTH = 5; |
| public final static int NISTRING_RA = 6; |
| public final static int NISTRING_EA = 7; |
| public final static int NISTRING_INDEX_SMALL = 8; |
| public final static int NISTRING_INDEX_MEDIUM = 9; |
| public final static int NISTRING_INDEX_LARGE = 10; |
| public final static int NISTRING_EMPTY = 11; |
| |
| private static final int[] NISTRING = new int[256]; |
| |
| private static final int[][] NISTRING_RANGES = { |
| // UTF-8 string |
| |
| // %00000000 to %00000111 UTF-8 no add to table small length |
| { 0x07, NISTRING_UTF8_SMALL_LENGTH }, |
| |
| // %00001000 UTF-8 no add to table medium length |
| { 0x08, NISTRING_UTF8_MEDIUM_LENGTH }, |
| |
| // %00001001 to %00001011 ILLEGAL |
| { 0x0B, STATE_ILLEGAL }, |
| |
| // %00001100 UTF-8 no add to table large length |
| { 0x0C, NISTRING_UTF8_LARGE_LENGTH }, |
| |
| // %00001101 to %00001111 ILLEGAL |
| { 0x0F, STATE_ILLEGAL }, |
| |
| // UTF-16 string |
| |
| // %00010000 to %00010111 UTF-16 no add to table small length |
| { 0x17, NISTRING_UTF16_SMALL_LENGTH }, |
| |
| // %00001000 UTF-16 no add to table medium length |
| { 0x18, NISTRING_UTF16_MEDIUM_LENGTH }, |
| |
| // %00011001 to %00011011 ILLEGAL |
| { 0x1B, STATE_ILLEGAL }, |
| |
| // %00011100 UTF-16 no add to table large length |
| { 0x1C, NISTRING_UTF16_LARGE_LENGTH }, |
| |
| // %00011101 to %00011111 ILLEGAL |
| { 0x1F, STATE_ILLEGAL }, |
| |
| // Restricted alphabet |
| |
| // %00100000 to %00101111 RA no add to table small length |
| { 0x2F, NISTRING_RA }, |
| |
| // Encoding algorithm |
| |
| // %00110000 to %00111111 EA no add to table |
| { 0x3F, NISTRING_EA }, |
| |
| // UTF-8 string, add to table |
| |
| // %01000000 to %01000111 UTF-8 add to table small length |
| { 0x47, NISTRING_UTF8_SMALL_LENGTH }, |
| |
| // %01001000 UTF-8 add to table medium length |
| { 0x48, NISTRING_UTF8_MEDIUM_LENGTH }, |
| |
| // %01001001 to %01001011 ILLEGAL |
| { 0x4B, STATE_ILLEGAL }, |
| |
| // %01001100 UTF-8 add to table large length |
| { 0x4C, NISTRING_UTF8_LARGE_LENGTH }, |
| |
| // %01001101 to %01001111 ILLEGAL |
| { 0x4F, STATE_ILLEGAL }, |
| |
| // UTF-16 string, add to table |
| |
| // %01010000 to %01010111 UTF-16 add to table small length |
| { 0x57, NISTRING_UTF16_SMALL_LENGTH }, |
| |
| // %01001000 UTF-16 add to table medium length |
| { 0x58, NISTRING_UTF16_MEDIUM_LENGTH }, |
| |
| // %01011001 to %01011011 ILLEGAL |
| { 0x5B, STATE_ILLEGAL }, |
| |
| // %01011100 UTF-16 add to table large length |
| { 0x5C, NISTRING_UTF16_LARGE_LENGTH }, |
| |
| // %01011101 to %01011111 ILLEGAL |
| { 0x5F, STATE_ILLEGAL }, |
| |
| // Restricted alphabet, add to table |
| |
| // %01100000 to %01101111 RA no add to table small length |
| { 0x6F, NISTRING_RA }, |
| |
| // Encoding algorithm, add to table |
| |
| // %01110000 to %01111111 EA add to table |
| { 0x7F, NISTRING_EA }, |
| |
| // Index |
| |
| // %10000000 to %10111111 index small |
| { 0xBF, NISTRING_INDEX_SMALL }, |
| |
| // %11000000 to %11011111 index medium |
| { 0xDF, NISTRING_INDEX_MEDIUM }, |
| |
| // %11100000 to %11101111 index large |
| { 0xEF, NISTRING_INDEX_LARGE }, |
| |
| // %11110000 to %11111110 ILLEGAL |
| { 0xFE, STATE_ILLEGAL }, |
| |
| // %11111111 Empty value |
| { 0xFF, NISTRING_EMPTY }, |
| }; |
| |
| |
| /* package */ final static int ISTRING_SMALL_LENGTH = 0; |
| /* package */ final static int ISTRING_MEDIUM_LENGTH = 1; |
| /* package */ final static int ISTRING_LARGE_LENGTH = 2; |
| /* package */ final static int ISTRING_INDEX_SMALL = 3; |
| /* package */ final static int ISTRING_INDEX_MEDIUM = 4; |
| /* package */ final static int ISTRING_INDEX_LARGE = 5; |
| |
| private static final int[] ISTRING = new int[256]; |
| |
| private static final int[][] ISTRING_RANGES = { |
| // %00000000 to %00111111 small length |
| { 0x3F, ISTRING_SMALL_LENGTH }, |
| |
| // %01000000 medium length |
| { 0x40, ISTRING_MEDIUM_LENGTH }, |
| |
| // %01000001 to %01011111 ILLEGAL |
| { 0x5F, STATE_ILLEGAL }, |
| |
| // %01100000 large length |
| { 0x60, ISTRING_LARGE_LENGTH }, |
| |
| // %01100001 to %01111111 ILLEGAL |
| { 0x7F, STATE_ILLEGAL }, |
| |
| // %10000000 to %10111111 index small |
| { 0xBF, ISTRING_INDEX_SMALL }, |
| |
| // %11000000 to %11011111 index medium |
| { 0xDF, ISTRING_INDEX_MEDIUM }, |
| |
| // %11100000 to %11101111 index large |
| { 0xEF, ISTRING_INDEX_LARGE }, |
| |
| // %11110000 to %11111111 ILLEGAL |
| { 0xFF, STATE_ILLEGAL }, |
| }; |
| |
| |
| /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6; |
| /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7; |
| /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8; |
| /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9; |
| /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10; |
| |
| private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256]; |
| |
| private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = { |
| // %00000000 to %00000001 small length |
| { 0x01, ISTRING_SMALL_LENGTH }, |
| |
| // %00000010 small length |
| { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 }, |
| |
| // %00000011 small length |
| { 0x03, ISTRING_SMALL_LENGTH }, |
| |
| // %00000100 small length |
| { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 }, |
| |
| // %00011011 small length |
| { 0x1B, ISTRING_SMALL_LENGTH }, |
| |
| // %00011100 small length |
| { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 }, |
| |
| // %00100010 small length |
| { 0x22, ISTRING_SMALL_LENGTH }, |
| |
| // %00100011 small length |
| { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 }, |
| |
| // %00000101 to %00111111 small length |
| { 0x3F, ISTRING_SMALL_LENGTH }, |
| |
| |
| |
| |
| // %01000000 medium length |
| { 0x40, ISTRING_MEDIUM_LENGTH }, |
| |
| // %01000001 to %01011111 ILLEGAL |
| { 0x5F, STATE_ILLEGAL }, |
| |
| // %01100000 large length |
| { 0x60, ISTRING_LARGE_LENGTH }, |
| |
| // %01100001 to %01111111 ILLEGAL |
| { 0x7F, STATE_ILLEGAL }, |
| |
| // %10000000 index small, 0 |
| { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO }, |
| |
| // %10000000 to %10111111 index small |
| { 0xBF, ISTRING_INDEX_SMALL }, |
| |
| // %11000000 to %11011111 index medium |
| { 0xDF, ISTRING_INDEX_MEDIUM }, |
| |
| // %11100000 to %11101111 index large |
| { 0xEF, ISTRING_INDEX_LARGE }, |
| |
| // %11110000 to %11111111 ILLEGAL |
| { 0xFF, STATE_ILLEGAL }, |
| }; |
| |
| // UTF-8 states |
| /* package */ final static int UTF8_NCNAME_NCNAME = 0; |
| /* package */ final static int UTF8_NCNAME_NCNAME_CHAR = 1; |
| /* package */ final static int UTF8_TWO_BYTES = 2; |
| /* package */ final static int UTF8_THREE_BYTES = 3; |
| /* package */ final static int UTF8_FOUR_BYTES = 4; |
| |
| private static final int[] UTF8_NCNAME = new int[256]; |
| |
| private static final int[][] UTF8_NCNAME_RANGES = { |
| |
| // Basic Latin |
| |
| // %00000000 to %00101100 |
| { 0x2C, STATE_ILLEGAL }, |
| |
| // '-' '.' |
| // %%00101101 to %00101110 [#x002D-#x002E] |
| { 0x2E, UTF8_NCNAME_NCNAME_CHAR }, |
| |
| // %00101111 |
| { 0x2F, STATE_ILLEGAL }, |
| |
| // [0-9] |
| // %0011000 to %00111001 [#x0030-#x0039] |
| { 0x39, UTF8_NCNAME_NCNAME_CHAR }, |
| |
| // %01000000 |
| { 0x40, STATE_ILLEGAL }, |
| |
| // [A-Z] |
| // %01000001 to %01011010 [#x0041-#x005A] |
| { 0x5A, UTF8_NCNAME_NCNAME }, |
| |
| // %01011110 |
| { 0x5E, STATE_ILLEGAL }, |
| |
| // '_' |
| // %01011111 [#x005F] |
| { 0x5F, UTF8_NCNAME_NCNAME }, |
| |
| // %01100000 |
| { 0x60, STATE_ILLEGAL }, |
| |
| // [a-z] |
| // %01100001 to %01111010 [#x0061-#x007A] |
| { 0x7A, UTF8_NCNAME_NCNAME }, |
| |
| // %01111011 to %01111111 |
| { 0x7F, STATE_ILLEGAL }, |
| |
| |
| // Two bytes |
| |
| // %10000000 to %11000001 |
| { 0xC1, STATE_ILLEGAL }, |
| |
| // %11000010 to %11011111 |
| { 0xDF, UTF8_TWO_BYTES }, |
| |
| |
| // Three bytes |
| |
| // %11100000 to %11101111 |
| { 0xEF, UTF8_THREE_BYTES }, |
| |
| |
| // Four bytes |
| |
| // %11110000 to %11110111 |
| { 0xF7, UTF8_FOUR_BYTES }, |
| |
| |
| // %11111000 to %11111111 |
| { 0xFF, STATE_ILLEGAL } |
| }; |
| |
| /* package */ final static int UTF8_ONE_BYTE = 1; |
| |
| private static final int[] UTF8 = new int[256]; |
| |
| private static final int[][] UTF8_RANGES = { |
| |
| // Basic Latin |
| |
| // %00000000 to %00001000 |
| { 0x08, STATE_ILLEGAL }, |
| |
| // CHARACTER TABULATION, LINE FEED |
| // %%00001001 to %00001010 [#x0009-#x000A] |
| { 0x0A, UTF8_ONE_BYTE }, |
| |
| // %00001011 to %00001100 |
| { 0x0C, STATE_ILLEGAL }, |
| |
| // CARRIAGE RETURN |
| // %00001101 [#x000D] |
| { 0x0D, UTF8_ONE_BYTE }, |
| |
| // %00001110 to %00011111 |
| { 0x1F, STATE_ILLEGAL }, |
| |
| // %0010000 to %01111111 |
| { 0x7F, UTF8_ONE_BYTE }, |
| |
| |
| // Two bytes |
| |
| // %10000000 to %11000001 |
| { 0xC1, STATE_ILLEGAL }, |
| |
| // %11000010 to %11011111 |
| { 0xDF, UTF8_TWO_BYTES }, |
| |
| |
| // Three bytes |
| |
| // %11100000 to %11101111 |
| { 0xEF, UTF8_THREE_BYTES }, |
| |
| |
| // Four bytes |
| |
| // %11110000 to %11110111 |
| { 0xF7, UTF8_FOUR_BYTES }, |
| |
| |
| // %11111000 to %11111111 |
| { 0xFF, STATE_ILLEGAL } |
| }; |
| |
| private static void constructTable(int[] table, int[][] ranges) { |
| int start = 0x00; |
| for (int range = 0; range < ranges.length; range++) { |
| int end = ranges[range][RANGE_INDEX_END]; |
| int value = ranges[range][RANGE_INDEX_VALUE]; |
| for (int i = start; i<= end; i++) { |
| table[i] = value; |
| } |
| start = end + 1; |
| } |
| } |
| |
| public static final int DII(final int index) { |
| return DII[index]; |
| } |
| |
| public static final int EII(final int index) { |
| return EII[index]; |
| } |
| |
| public static final int AII(final int index) { |
| return AII[index]; |
| } |
| |
| public static final int NISTRING(final int index) { |
| return NISTRING[index]; |
| } |
| |
| public static final int ISTRING(final int index) { |
| return ISTRING[index]; |
| } |
| |
| public static final int ISTRING_PREFIX_NAMESPACE(final int index) { |
| return ISTRING_PREFIX_NAMESPACE[index]; |
| } |
| |
| public static final int UTF8(final int index) { |
| return UTF8[index]; |
| } |
| |
| public static final int UTF8_NCNAME(final int index) { |
| return UTF8_NCNAME[index]; |
| } |
| |
| static { |
| // DII |
| constructTable(DII, DII_RANGES); |
| |
| // EII |
| constructTable(EII, EII_RANGES); |
| |
| // AII |
| constructTable(AII, AII_RANGES); |
| |
| // AII Value |
| constructTable(NISTRING, NISTRING_RANGES); |
| |
| // Identifying string |
| constructTable(ISTRING, ISTRING_RANGES); |
| |
| // Identifying string |
| constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES); |
| |
| // UTF-8 NCNAME states |
| constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES); |
| |
| // UTF-8 states |
| constructTable(UTF8, UTF8_RANGES); |
| } |
| |
| private DecoderStateTables() { |
| } |
| } |