| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. All rights reserved. |
| // http://code.google.com/p/protobuf/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| package com.google.protobuf.nano; |
| |
| import java.io.UnsupportedEncodingException; |
| |
| /** |
| * The classes contained within are used internally by the Protocol Buffer |
| * library and generated message implementations. They are public only because |
| * those generated messages do not reside in the {@code protobuf} package. |
| * Others should not use this class directly. |
| * |
| * @author kenton@google.com (Kenton Varda) |
| */ |
| public class InternalNano { |
| /** |
| * Helper called by generated code to construct default values for string |
| * fields. |
| * <p> |
| * The protocol compiler does not actually contain a UTF-8 decoder -- it |
| * just pushes UTF-8-encoded text around without touching it. The one place |
| * where this presents a problem is when generating Java string literals. |
| * Unicode characters in the string literal would normally need to be encoded |
| * using a Unicode escape sequence, which would require decoding them. |
| * To get around this, protoc instead embeds the UTF-8 bytes into the |
| * generated code and leaves it to the runtime library to decode them. |
| * <p> |
| * It gets worse, though. If protoc just generated a byte array, like: |
| * new byte[] {0x12, 0x34, 0x56, 0x78} |
| * Java actually generates *code* which allocates an array and then fills |
| * in each value. This is much less efficient than just embedding the bytes |
| * directly into the bytecode. To get around this, we need another |
| * work-around. String literals are embedded directly, so protoc actually |
| * generates a string literal corresponding to the bytes. The easiest way |
| * to do this is to use the ISO-8859-1 character set, which corresponds to |
| * the first 256 characters of the Unicode range. Protoc can then use |
| * good old CEscape to generate the string. |
| * <p> |
| * So we have a string literal which represents a set of bytes which |
| * represents another string. This function -- stringDefaultValue -- |
| * converts from the generated string to the string we actually want. The |
| * generated code calls this automatically. |
| */ |
| public static final String stringDefaultValue(String bytes) { |
| try { |
| return new String(bytes.getBytes("ISO-8859-1"), "UTF-8"); |
| } catch (UnsupportedEncodingException e) { |
| // This should never happen since all JVMs are required to implement |
| // both of the above character sets. |
| throw new IllegalStateException( |
| "Java VM does not support a standard character set.", e); |
| } |
| } |
| |
| /** |
| * Helper called by generated code to construct default values for bytes |
| * fields. |
| * <p> |
| * This is a lot like {@link #stringDefaultValue}, but for bytes fields. |
| * In this case we only need the second of the two hacks -- allowing us to |
| * embed raw bytes as a string literal with ISO-8859-1 encoding. |
| */ |
| public static final byte[] bytesDefaultValue(String bytes) { |
| try { |
| return bytes.getBytes("ISO-8859-1"); |
| } catch (UnsupportedEncodingException e) { |
| // This should never happen since all JVMs are required to implement |
| // ISO-8859-1. |
| throw new IllegalStateException( |
| "Java VM does not support a standard character set.", e); |
| } |
| } |
| |
| /** |
| * Helper function to convert a string into UTF-8 while turning the |
| * UnsupportedEncodingException to a RuntimeException. |
| */ |
| public static final byte[] copyFromUtf8(final String text) { |
| try { |
| return text.getBytes("UTF-8"); |
| } catch (UnsupportedEncodingException e) { |
| throw new RuntimeException("UTF-8 not supported?"); |
| } |
| } |
| } |