dx/src/com/android/dx/rop/cst/CstString.java - platform/dalvik2 - Git at Google

 /*
  * Copyright (C) 2007 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.android.dx.rop.cst;

 import com.android.dx.rop.type.Type;
 import com.android.dx.util.ByteArray;
 import com.android.dx.util.Hex;

 /**
  * Constants of type {@code CONSTANT_Utf8_info} or {@code CONSTANT_String_info}.
  */
 public final class CstString extends TypedConstant {
     /**
      * {@code non-null;} instance representing {@code ""}, that is, the
      * empty string
      */
     public static final CstString EMPTY_STRING = new CstString("");

     /** {@code non-null;} the UTF-8 value as a string */
     private final String string;

     /** {@code non-null;} the UTF-8 value as bytes */
     private final ByteArray bytes;

     /**
      * Converts a string into its MUTF-8 form. MUTF-8 differs from normal UTF-8
      * in the handling of character '\0' and surrogate pairs.
      *
      * @param string {@code non-null;} the string to convert
      * @return {@code non-null;} the UTF-8 bytes for it
      */
     public static byte[] stringToUtf8Bytes(String string) {
         int len = string.length();
         byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
         int outAt = 0;

         for (int i = 0; i < len; i++) {
             char c = string.charAt(i);
             if ((c != 0) && (c < 0x80)) {
                 bytes[outAt] = (byte) c;
                 outAt++;
             } else if (c < 0x800) {
                 bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
                 bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
                 outAt += 2;
             } else {
                 bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
                 bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
                 bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
                 outAt += 3;
             }
         }

         byte[] result = new byte[outAt];
         System.arraycopy(bytes, 0, result, 0, outAt);
         return result;
     }

     /**
      * Converts an array of UTF-8 bytes into a string.
      *
      * @param bytes {@code non-null;} the bytes to convert
      * @return {@code non-null;} the converted string
      */
     public static String utf8BytesToString(ByteArray bytes) {
         int length = bytes.size();
         char[] chars = new char[length]; // This is sized to avoid a realloc.
         int outAt = 0;

         for (int at = 0; length > 0; /*at*/) {
             int v0 = bytes.getUnsignedByte(at);
             char out;
             switch (v0 >> 4) {
                 case 0x00: case 0x01: case 0x02: case 0x03:
                 case 0x04: case 0x05: case 0x06: case 0x07: {
                     // 0XXXXXXX -- single-byte encoding
                     length--;
                     if (v0 == 0) {
                         // A single zero byte is illegal.
                         return throwBadUtf8(v0, at);
                     }
                     out = (char) v0;
                     at++;
                     break;
                 }
                 case 0x0c: case 0x0d: {
                     // 110XXXXX -- two-byte encoding
                     length -= 2;
                     if (length < 0) {
                         return throwBadUtf8(v0, at);
                     }
                     int v1 = bytes.getUnsignedByte(at + 1);
                     if ((v1 & 0xc0) != 0x80) {
                         return throwBadUtf8(v1, at + 1);
                     }
                     int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
                     if ((value != 0) && (value < 0x80)) {
                         /*
                          * This should have been represented with
                          * one-byte encoding.
                          */
                         return throwBadUtf8(v1, at + 1);
                     }
                     out = (char) value;
                     at += 2;
                     break;
                 }
                 case 0x0e: {
                     // 1110XXXX -- three-byte encoding
                     length -= 3;
                     if (length < 0) {
                         return throwBadUtf8(v0, at);
                     }
                     int v1 = bytes.getUnsignedByte(at + 1);
                     if ((v1 & 0xc0) != 0x80) {
                         return throwBadUtf8(v1, at + 1);
                     }
                     int v2 = bytes.getUnsignedByte(at + 2);
                     if ((v1 & 0xc0) != 0x80) {
                         return throwBadUtf8(v2, at + 2);
                     }
                     int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
                         (v2 & 0x3f);
                     if (value < 0x800) {
                         /*
                          * This should have been represented with one- or
                          * two-byte encoding.
                          */
                         return throwBadUtf8(v2, at + 2);
                     }
                     out = (char) value;
                     at += 3;
                     break;
                 }
                 default: {
                     // 10XXXXXX, 1111XXXX -- illegal
                     return throwBadUtf8(v0, at);
                 }
             }
             chars[outAt] = out;
             outAt++;
         }

         return new String(chars, 0, outAt);
     }

     /**
      * Helper for {@link #utf8BytesToString}, which throws the right
      * exception for a bogus utf-8 byte.
      *
      * @param value the byte value
      * @param offset the file offset
      * @return never
      * @throws IllegalArgumentException always thrown
      */
     private static String throwBadUtf8(int value, int offset) {
         throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
                                            " at offset " + Hex.u4(offset));
     }

     /**
      * Constructs an instance from a {@code String}.
      *
      * @param string {@code non-null;} the UTF-8 value as a string
      */
     public CstString(String string) {
         if (string == null) {
             throw new NullPointerException("string == null");
         }

         this.string = string.intern();
         this.bytes = new ByteArray(stringToUtf8Bytes(string));
     }

     /**
      * Constructs an instance from some UTF-8 bytes.
      *
      * @param bytes {@code non-null;} array of the UTF-8 bytes
      */
     public CstString(ByteArray bytes) {
         if (bytes == null) {
             throw new NullPointerException("bytes == null");
         }

         this.bytes = bytes;
         this.string = utf8BytesToString(bytes).intern();
     }

     /** {@inheritDoc} */
     @Override
     public boolean equals(Object other) {
         if (!(other instanceof CstString)) {
             return false;
         }

         return string.equals(((CstString) other).string);
     }

     /** {@inheritDoc} */
     @Override
     public int hashCode() {
         return string.hashCode();
     }

     /** {@inheritDoc} */
     @Override
     protected int compareTo0(Constant other) {
         return string.compareTo(((CstString) other).string);
     }

     /** {@inheritDoc} */
     @Override
     public String toString() {
         return "string{\"" + toHuman() + "\"}";
     }

     /** {@inheritDoc} */
     @Override
     public String typeName() {
         return "utf8";
     }

     /** {@inheritDoc} */
     @Override
     public boolean isCategory2() {
         return false;
     }

     /** {@inheritDoc} */
     public String toHuman() {
         int len = string.length();
         StringBuilder sb = new StringBuilder(len * 3 / 2);

         for (int i = 0; i < len; i++) {
             char c = string.charAt(i);
             if ((c >= ' ') && (c < 0x7f)) {
                 if ((c == '\'') || (c == '\"') || (c == '\\')) {
                     sb.append('\\');
                 }
                 sb.append(c);
             } else if (c <= 0x7f) {
                 switch (c) {
                     case '\n': sb.append("\\n"); break;
                     case '\r': sb.append("\\r"); break;
                     case '\t': sb.append("\\t"); break;
                     default: {
                         /*
                          * Represent the character as an octal escape.
                          * If the next character is a valid octal
                          * digit, disambiguate by using the
                          * three-digit form.
                          */
                         char nextChar =
                             (i < (len - 1)) ? string.charAt(i + 1) : 0;
                         boolean displayZero =
                             (nextChar >= '0') && (nextChar <= '7');
                         sb.append('\\');
                         for (int shift = 6; shift >= 0; shift -= 3) {
                             char outChar = (char) (((c >> shift) & 7) + '0');
                             if ((outChar != '0') || displayZero) {
                                 sb.append(outChar);
                                 displayZero = true;
                             }
                         }
                         if (! displayZero) {
                             // Ironic edge case: The original value was 0.
                             sb.append('0');
                         }
                         break;
                     }
                 }
             } else {
                 sb.append("\\u");
                 sb.append(Character.forDigit(c >> 12, 16));
                 sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
                 sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
                 sb.append(Character.forDigit(c & 0x0f, 16));
             }
         }

         return sb.toString();
     }

     /**
      * Gets the value as a human-oriented string, surrounded by double
      * quotes.
      *
      * @return {@code non-null;} the quoted string
      */
     public String toQuoted() {
         return '\"' + toHuman() + '\"';
     }

     /**
      * Gets the value as a human-oriented string, surrounded by double
      * quotes, but ellipsizes the result if it is longer than the given
      * maximum length
      *
      * @param maxLength {@code >= 5;} the maximum length of the string to return
      * @return {@code non-null;} the quoted string
      */
     public String toQuoted(int maxLength) {
         String string = toHuman();
         int length = string.length();
         String ellipses;

         if (length <= (maxLength - 2)) {
             ellipses = "";
         } else {
             string = string.substring(0, maxLength - 5);
             ellipses = "...";
         }

         return '\"' + string + ellipses + '\"';
     }

     /**
      * Gets the UTF-8 value as a string.
      * The returned string is always already interned.
      *
      * @return {@code non-null;} the UTF-8 value as a string
      */
     public String getString() {
         return string;
     }

     /**
      * Gets the UTF-8 value as UTF-8 encoded bytes.
      *
      * @return {@code non-null;} an array of the UTF-8 bytes
      */
     public ByteArray getBytes() {
         return bytes;
     }

     /**
      * Gets the size of this instance as UTF-8 code points. That is,
      * get the number of bytes in the UTF-8 encoding of this instance.
      *
      * @return {@code >= 0;} the UTF-8 size
      */
     public int getUtf8Size() {
         return bytes.size();
     }

     /**
      * Gets the size of this instance as UTF-16 code points. That is,
      * get the number of 16-bit chars in the UTF-16 encoding of this
      * instance. This is the same as the {@code length} of the
      * Java {@code String} representation of this instance.
      *
      * @return {@code >= 0;} the UTF-16 size
      */
     public int getUtf16Size() {
         return string.length();
     }

     public Type getType() {
         return Type.STRING;
     }
 }
	/*
	* Copyright (C) 2007 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.android.dx.rop.cst;

	import com.android.dx.rop.type.Type;
	import com.android.dx.util.ByteArray;
	import com.android.dx.util.Hex;

	/**
	* Constants of type {@code CONSTANT_Utf8_info} or {@code CONSTANT_String_info}.
	*/
	public final class CstString extends TypedConstant {
	/**
	* {@code non-null;} instance representing {@code ""}, that is, the
	* empty string
	*/
	public static final CstString EMPTY_STRING = new CstString("");

	/** {@code non-null;} the UTF-8 value as a string */
	private final String string;

	/** {@code non-null;} the UTF-8 value as bytes */
	private final ByteArray bytes;

	/**
	* Converts a string into its MUTF-8 form. MUTF-8 differs from normal UTF-8
	* in the handling of character '\0' and surrogate pairs.
	*
	* @param string {@code non-null;} the string to convert
	* @return {@code non-null;} the UTF-8 bytes for it
	*/
	public static byte[] stringToUtf8Bytes(String string) {
	int len = string.length();
	byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
	int outAt = 0;

	for (int i = 0; i < len; i++) {
	char c = string.charAt(i);
	if ((c != 0) && (c < 0x80)) {
	bytes[outAt] = (byte) c;
	outAt++;
	} else if (c < 0x800) {
	bytes[outAt] = (byte) (((c >> 6) & 0x1f) \| 0xc0);
	bytes[outAt + 1] = (byte) ((c & 0x3f) \| 0x80);
	outAt += 2;
	} else {
	bytes[outAt] = (byte) (((c >> 12) & 0x0f) \| 0xe0);
	bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) \| 0x80);
	bytes[outAt + 2] = (byte) ((c & 0x3f) \| 0x80);
	outAt += 3;
	}
	}

	byte[] result = new byte[outAt];
	System.arraycopy(bytes, 0, result, 0, outAt);
	return result;
	}

	/**
	* Converts an array of UTF-8 bytes into a string.
	*
	* @param bytes {@code non-null;} the bytes to convert
	* @return {@code non-null;} the converted string
	*/
	public static String utf8BytesToString(ByteArray bytes) {
	int length = bytes.size();
	char[] chars = new char[length]; // This is sized to avoid a realloc.
	int outAt = 0;

	for (int at = 0; length > 0; /at/) {
	int v0 = bytes.getUnsignedByte(at);
	char out;
	switch (v0 >> 4) {
	case 0x00: case 0x01: case 0x02: case 0x03:
	case 0x04: case 0x05: case 0x06: case 0x07: {
	// 0XXXXXXX -- single-byte encoding
	length--;
	if (v0 == 0) {
	// A single zero byte is illegal.
	return throwBadUtf8(v0, at);
	}
	out = (char) v0;
	at++;
	break;
	}
	case 0x0c: case 0x0d: {
	// 110XXXXX -- two-byte encoding
	length -= 2;
	if (length < 0) {
	return throwBadUtf8(v0, at);
	}
	int v1 = bytes.getUnsignedByte(at + 1);
	if ((v1 & 0xc0) != 0x80) {
	return throwBadUtf8(v1, at + 1);
	}
	int value = ((v0 & 0x1f) << 6) \| (v1 & 0x3f);
	if ((value != 0) && (value < 0x80)) {
	/*
	* This should have been represented with
	* one-byte encoding.
	*/
	return throwBadUtf8(v1, at + 1);
	}
	out = (char) value;
	at += 2;
	break;
	}
	case 0x0e: {
	// 1110XXXX -- three-byte encoding
	length -= 3;
	if (length < 0) {
	return throwBadUtf8(v0, at);
	}
	int v1 = bytes.getUnsignedByte(at + 1);
	if ((v1 & 0xc0) != 0x80) {
	return throwBadUtf8(v1, at + 1);
	}
	int v2 = bytes.getUnsignedByte(at + 2);
	if ((v1 & 0xc0) != 0x80) {
	return throwBadUtf8(v2, at + 2);
	}
	int value = ((v0 & 0x0f) << 12) \| ((v1 & 0x3f) << 6) \|
	(v2 & 0x3f);
	if (value < 0x800) {
	/*
	* This should have been represented with one- or
	* two-byte encoding.
	*/
	return throwBadUtf8(v2, at + 2);
	}
	out = (char) value;
	at += 3;
	break;
	}
	default: {
	// 10XXXXXX, 1111XXXX -- illegal
	return throwBadUtf8(v0, at);
	}
	}
	chars[outAt] = out;
	outAt++;
	}

	return new String(chars, 0, outAt);
	}

	/**
	* Helper for {@link #utf8BytesToString}, which throws the right
	* exception for a bogus utf-8 byte.
	*
	* @param value the byte value
	* @param offset the file offset
	* @return never
	* @throws IllegalArgumentException always thrown
	*/
	private static String throwBadUtf8(int value, int offset) {
	throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
	" at offset " + Hex.u4(offset));
	}

	/**
	* Constructs an instance from a {@code String}.
	*
	* @param string {@code non-null;} the UTF-8 value as a string
	*/
	public CstString(String string) {
	if (string == null) {
	throw new NullPointerException("string == null");
	}

	this.string = string.intern();
	this.bytes = new ByteArray(stringToUtf8Bytes(string));
	}

	/**
	* Constructs an instance from some UTF-8 bytes.
	*
	* @param bytes {@code non-null;} array of the UTF-8 bytes
	*/
	public CstString(ByteArray bytes) {
	if (bytes == null) {
	throw new NullPointerException("bytes == null");
	}

	this.bytes = bytes;
	this.string = utf8BytesToString(bytes).intern();
	}

	/** {@inheritDoc} */
	@Override
	public boolean equals(Object other) {
	if (!(other instanceof CstString)) {
	return false;
	}

	return string.equals(((CstString) other).string);
	}

	/** {@inheritDoc} */
	@Override
	public int hashCode() {
	return string.hashCode();
	}

	/** {@inheritDoc} */
	@Override
	protected int compareTo0(Constant other) {
	return string.compareTo(((CstString) other).string);
	}

	/** {@inheritDoc} */
	@Override
	public String toString() {
	return "string{\"" + toHuman() + "\"}";
	}

	/** {@inheritDoc} */
	@Override
	public String typeName() {
	return "utf8";
	}

	/** {@inheritDoc} */
	@Override
	public boolean isCategory2() {
	return false;
	}

	/** {@inheritDoc} */
	public String toHuman() {
	int len = string.length();
	StringBuilder sb = new StringBuilder(len * 3 / 2);

	for (int i = 0; i < len; i++) {
	char c = string.charAt(i);
	if ((c >= ' ') && (c < 0x7f)) {
	if ((c == '\'') \|\| (c == '\"') \|\| (c == '\\')) {
	sb.append('\\');
	}
	sb.append(c);
	} else if (c <= 0x7f) {
	switch (c) {
	case '\n': sb.append("\\n"); break;
	case '\r': sb.append("\\r"); break;
	case '\t': sb.append("\\t"); break;
	default: {
	/*
	* Represent the character as an octal escape.
	* If the next character is a valid octal
	* digit, disambiguate by using the
	* three-digit form.
	*/
	char nextChar =
	(i < (len - 1)) ? string.charAt(i + 1) : 0;
	boolean displayZero =
	(nextChar >= '0') && (nextChar <= '7');
	sb.append('\\');
	for (int shift = 6; shift >= 0; shift -= 3) {
	char outChar = (char) (((c >> shift) & 7) + '0');
	if ((outChar != '0') \|\| displayZero) {
	sb.append(outChar);
	displayZero = true;
	}
	}
	if (! displayZero) {
	// Ironic edge case: The original value was 0.
	sb.append('0');
	}
	break;
	}
	}
	} else {
	sb.append("\\u");
	sb.append(Character.forDigit(c >> 12, 16));
	sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
	sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
	sb.append(Character.forDigit(c & 0x0f, 16));
	}
	}

	return sb.toString();
	}

	/**
	* Gets the value as a human-oriented string, surrounded by double
	* quotes.
	*
	* @return {@code non-null;} the quoted string
	*/
	public String toQuoted() {
	return '\"' + toHuman() + '\"';
	}

	/**
	* Gets the value as a human-oriented string, surrounded by double
	* quotes, but ellipsizes the result if it is longer than the given
	* maximum length
	*
	* @param maxLength {@code >= 5;} the maximum length of the string to return
	* @return {@code non-null;} the quoted string
	*/
	public String toQuoted(int maxLength) {
	String string = toHuman();
	int length = string.length();
	String ellipses;

	if (length <= (maxLength - 2)) {
	ellipses = "";
	} else {
	string = string.substring(0, maxLength - 5);
	ellipses = "...";
	}

	return '\"' + string + ellipses + '\"';
	}

	/**
	* Gets the UTF-8 value as a string.
	* The returned string is always already interned.
	*
	* @return {@code non-null;} the UTF-8 value as a string
	*/
	public String getString() {
	return string;
	}

	/**
	* Gets the UTF-8 value as UTF-8 encoded bytes.
	*
	* @return {@code non-null;} an array of the UTF-8 bytes
	*/
	public ByteArray getBytes() {
	return bytes;
	}

	/**
	* Gets the size of this instance as UTF-8 code points. That is,
	* get the number of bytes in the UTF-8 encoding of this instance.
	*
	* @return {@code >= 0;} the UTF-8 size
	*/
	public int getUtf8Size() {
	return bytes.size();
	}

	/**
	* Gets the size of this instance as UTF-16 code points. That is,
	* get the number of 16-bit chars in the UTF-16 encoding of this
	* instance. This is the same as the {@code length} of the
	* Java {@code String} representation of this instance.
	*
	* @return {@code >= 0;} the UTF-16 size
	*/
	public int getUtf16Size() {
	return string.length();
	}

	public Type getType() {
	return Type.STRING;
	}
	}