| /* |
| * ProGuard -- shrinking, optimization, obfuscation, and preverification |
| * of Java bytecode. |
| * |
| * Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu) |
| * |
| * This program is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * This program is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| * more details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with this program; if not, write to the Free Software Foundation, Inc., |
| * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| */ |
| package proguard.classfile.constant; |
| |
| import proguard.classfile.*; |
| import proguard.classfile.constant.visitor.ConstantVisitor; |
| |
| import java.io.UnsupportedEncodingException; |
| |
| /** |
| * This Constant represents a UTF-8 constant in the constant pool. |
| * |
| * @author Eric Lafortune |
| */ |
| public class Utf8Constant extends Constant |
| { |
| private static final char TWO_BYTE_LIMIT = 0x80; |
| private static final int TWO_BYTE_CONSTANT1 = 0xc0; |
| private static final int TWO_BYTE_CONSTANT2 = 0x80; |
| private static final int TWO_BYTE_SHIFT1 = 6; |
| private static final int TWO_BYTE_MASK1 = 0x1f; |
| private static final int TWO_BYTE_MASK2 = 0x3f; |
| |
| private static final char THREE_BYTE_LIMIT = 0x800; |
| private static final int THREE_BYTE_CONSTANT1 = 0xe0; |
| private static final int THREE_BYTE_CONSTANT2 = 0x80; |
| private static final int THREE_BYTE_CONSTANT3 = 0x80; |
| private static final int THREE_BYTE_SHIFT1 = 12; |
| private static final int THREE_BYTE_SHIFT2 = 6; |
| private static final int THREE_BYTE_MASK1 = 0x0f; |
| private static final int THREE_BYTE_MASK2 = 0x3f; |
| private static final int THREE_BYTE_MASK3 = 0x3f; |
| |
| |
| // There are a lot of Utf8Constant objects, so we're optimising their storage. |
| // Initially, we're storing the UTF-8 bytes in a byte array. |
| // When the corresponding String is requested, we ditch the array and just |
| // store the String. |
| |
| //private int u2length; |
| private byte[] bytes; |
| |
| private String string; |
| |
| |
| /** |
| * Creates an uninitialized Utf8Constant. |
| * |
| */ |
| public Utf8Constant() |
| { |
| } |
| |
| |
| /** |
| * Creates a Utf8Constant containing the given string. |
| */ |
| public Utf8Constant(String string) |
| { |
| this.bytes = null; |
| this.string = string; |
| } |
| |
| |
| /** |
| * Initializes the UTF-8 data with an array of bytes. |
| */ |
| public void setBytes(byte[] bytes) |
| { |
| this.bytes = bytes; |
| this.string = null; |
| } |
| |
| |
| /** |
| * Returns the UTF-8 data as an array of bytes. |
| */ |
| public byte[] getBytes() |
| { |
| try |
| { |
| switchToByteArrayRepresentation(); |
| } |
| catch (UnsupportedEncodingException ex) |
| { |
| throw new RuntimeException(ex.getMessage()); |
| } |
| |
| return bytes; |
| } |
| |
| |
| /** |
| * Initializes the UTF-8 data with a String. |
| */ |
| public void setString(String utf8String) |
| { |
| this.bytes = null; |
| this.string = utf8String; |
| } |
| |
| |
| /** |
| * Returns the UTF-8 data as a String. |
| */ |
| public String getString() |
| { |
| try |
| { |
| switchToStringRepresentation(); |
| } |
| catch (UnsupportedEncodingException ex) |
| { |
| throw new RuntimeException(ex.getMessage()); |
| } |
| |
| return string; |
| } |
| |
| |
| // Implementations for Constant. |
| |
| public int getTag() |
| { |
| return ClassConstants.CONSTANT_Utf8; |
| } |
| |
| public void accept(Clazz clazz, ConstantVisitor constantVisitor) |
| { |
| constantVisitor.visitUtf8Constant(clazz, this); |
| } |
| |
| |
| // Small utility methods. |
| |
| /** |
| * Switches to a byte array representation of the UTF-8 data. |
| */ |
| private void switchToByteArrayRepresentation() throws UnsupportedEncodingException |
| { |
| if (bytes == null) |
| { |
| bytes = getByteArrayRepresentation(string); |
| string = null; |
| } |
| } |
| |
| |
| /** |
| * Switches to a String representation of the UTF-8 data. |
| */ |
| private void switchToStringRepresentation() throws UnsupportedEncodingException |
| { |
| if (string == null) |
| { |
| string = getStringRepresentation(bytes); |
| bytes = null; |
| } |
| } |
| |
| |
| /** |
| * Returns the modified UTF-8 byte array representation of the given string. |
| */ |
| private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException |
| { |
| // We're computing the byte array ourselves, because the implementation |
| // of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2. |
| // Also note the special treatment of the 0 character. |
| |
| // Compute the byte array length. |
| int byteLength = 0; |
| int stringLength = string.length(); |
| for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) |
| { |
| char c = string.charAt(stringIndex); |
| |
| // The character is represented by one, two, or three bytes. |
| byteLength += c == 0 ? 2 : |
| c < TWO_BYTE_LIMIT ? 1 : |
| c < THREE_BYTE_LIMIT ? 2 : |
| 3; |
| } |
| |
| // Allocate the byte array with the computed length. |
| byte[] bytes = new byte[byteLength]; |
| |
| // Fill out the array. |
| int byteIndex = 0; |
| for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) |
| { |
| char c = string.charAt(stringIndex); |
| if (c == 0) |
| { |
| // The 0 character gets a two-byte representation in classes. |
| bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1; |
| bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2; |
| } |
| else if (c < TWO_BYTE_LIMIT) |
| { |
| // The character is represented by a single byte. |
| bytes[byteIndex++] = (byte)c; |
| } |
| else if (c < THREE_BYTE_LIMIT) |
| { |
| // The character is represented by two bytes. |
| bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT1 | ((c >>> TWO_BYTE_SHIFT1) & TWO_BYTE_MASK1)); |
| bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT2 | ( c & TWO_BYTE_MASK2)); |
| } |
| else |
| { |
| // The character is represented by three bytes. |
| bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT1 | ((c >>> THREE_BYTE_SHIFT1) & THREE_BYTE_MASK1)); |
| bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT2 | ((c >>> THREE_BYTE_SHIFT2) & THREE_BYTE_MASK2)); |
| bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT3 | ( c & THREE_BYTE_MASK3)); |
| } |
| } |
| |
| return bytes; |
| } |
| |
| |
| /** |
| * Returns the String representation of the given modified UTF-8 byte array. |
| */ |
| private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException |
| { |
| // We're computing the string ourselves, because the implementation |
| // of "new String(bytes)" doesn't honor the special treatment of |
| // the 0 character in JRE 1.6_u11. |
| |
| // Allocate the byte array with the computed length. |
| char[] chars = new char[bytes.length]; |
| |
| // Fill out the array. |
| int charIndex = 0; |
| int byteIndex = 0; |
| while (byteIndex < bytes.length) |
| { |
| |
| int b = bytes[byteIndex++] & 0xff; |
| |
| // Depending on the flag bits in the first byte, the character |
| // is represented by a single byte, by two bytes, or by three |
| // bytes. We're not checking the redundant flag bits in the |
| // second byte and the third byte. |
| try |
| { |
| chars[charIndex++] = |
| (char)(b < TWO_BYTE_CONSTANT1 ? b : |
| |
| b < THREE_BYTE_CONSTANT1 ? ((b & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) | |
| ((bytes[byteIndex++] & TWO_BYTE_MASK2) ) : |
| |
| ((b & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) | |
| ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) | |
| ((bytes[byteIndex++] & THREE_BYTE_MASK3) )); |
| } |
| catch (ArrayIndexOutOfBoundsException e) |
| { |
| throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]"); |
| } |
| } |
| |
| return new String(chars, 0, charIndex); |
| } |
| } |