| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License |
| */ |
| |
| package libcore.net; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.net.URISyntaxException; |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.CodingErrorAction; |
| import java.nio.charset.StandardCharsets; |
| |
| /** |
| * Encodes and decodes “application/x-www-form-urlencoded” content. |
| * |
| * Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t. |
| * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to |
| * its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form. |
| * e.g a 4 byte sequence might look like” %c6%ef%e0%e8” |
| */ |
| public abstract class UriCodec { |
| /** |
| * Returns true iff. ‘c’ does not need to be escaped. |
| * 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be |
| * escaped. This set is referred to as the ``whitelist''. |
| */ |
| protected abstract boolean isRetained(char c); |
| |
| private static boolean isWhitelisted(char c) { |
| return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9'); |
| } |
| |
| private boolean isWhitelistedOrRetained(char c) { |
| return isWhitelisted(c) || isRetained(c); |
| } |
| |
| /** |
| * Throw URISyntaxException if any of the characters in the range [start, end) are not valid |
| * according to this codec. |
| * - If a char is in the whitelist or retained, it is valid both escaped and unescaped. |
| * - All escaped octets appearing in the input are structurally valid hex, i.e convertible to |
| * decimals. |
| * |
| * On success, the substring [start, end) is returned. |
| * {@code name} is not used, except to generate debugging info. |
| */ |
| public final String validate(String uri, int start, int end, String name) |
| throws URISyntaxException { |
| int i = start; |
| while (i < end) { |
| char c = uri.charAt(i++); |
| if (isWhitelistedOrRetained(c)) { |
| continue; |
| } |
| // c is either '%' or character not allowed in a uri. |
| if (c != '%') { |
| throw unexpectedCharacterException(uri, name, c, i - 1); |
| } |
| // Expect two characters representing a number in hex. |
| for (int j = 0; j < 2; j++) { |
| c = getNextCharacter(uri, i++, end, name); |
| if (hexCharToValue(c) < 0) { |
| throw unexpectedCharacterException(uri, name, c, i - 1); |
| } |
| } |
| } |
| return uri.substring(start, end); |
| } |
| |
| /** |
| * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f'). |
| */ |
| private static int hexCharToValue(char c) { |
| if('0' <= c && c <= '9') { |
| return c - '0'; |
| } |
| if ('a' <= c && c <= 'f') { |
| return 10 + c - 'a'; |
| } |
| if ('A' <= c && c <= 'F') { |
| return 10 + c - 'A'; |
| } |
| return -1; |
| } |
| |
| private static URISyntaxException unexpectedCharacterException( |
| String uri, String name, char unexpected, int index) { |
| String nameString = (name == null) ? "" : " in [" + name + "]"; |
| return new URISyntaxException( |
| uri, "Unexpected character" + nameString + ": " + unexpected, index); |
| } |
| |
| private static char getNextCharacter(String uri, int index, int end, String name) |
| throws URISyntaxException { |
| if (index >= end) { |
| String nameString = (name == null) ? "" : " in [" + name + "]"; |
| throw new URISyntaxException( |
| uri, "Unexpected end of string" + nameString, index); |
| } |
| return uri.charAt(index); |
| } |
| |
| /** |
| * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor |
| * in {@code legal}. |
| */ |
| public static void validateSimple(String uri, String legal) throws URISyntaxException { |
| for (int i = 0; i < uri.length(); i++) { |
| char c = uri.charAt(i); |
| if (!isWhitelisted(c) && legal.indexOf(c) < 0) { |
| throw unexpectedCharacterException(uri, null /* name */, c, i); |
| } |
| } |
| } |
| |
| /** |
| * Encodes the string {@code s} as per the rules of this encoder (see class level comment). |
| * |
| * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes. |
| */ |
| public final String encode(String s, Charset charset) { |
| StringBuilder builder = new StringBuilder(s.length()); |
| appendEncoded(builder, s, charset, false); |
| return builder.toString(); |
| } |
| |
| /** |
| * Encodes the string {@code s} as per the rules of this encoder (see class level comment). |
| * |
| * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). |
| */ |
| public final void appendEncoded(StringBuilder builder, String s) { |
| appendEncoded(builder, s, StandardCharsets.UTF_8, false); |
| } |
| |
| /** |
| * Encodes the string {@code s} as per the rules of this encoder (see class level comment). |
| * |
| * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). |
| * This method must produce partially encoded output. What this means is that if encoded octets |
| * appear in the input string, they are passed through unmodified, instead of being double |
| * escaped. Consider a decoder operating on the global whitelist dealing with a string |
| * “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it |
| * will be double encoded into “foo%2525bar”. |
| */ |
| public final void appendPartiallyEncoded(StringBuilder builder, String s) { |
| appendEncoded(builder, s, StandardCharsets.UTF_8, true); |
| } |
| |
| private void appendEncoded( |
| StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) { |
| CharsetEncoder encoder = charset.newEncoder() |
| .onMalformedInput(CodingErrorAction.REPORT) |
| .onUnmappableCharacter(CodingErrorAction.REPORT); |
| CharBuffer cBuffer = CharBuffer.allocate(s.length()); |
| for (int i = 0; i < s.length(); i++) { |
| char c = s.charAt(i); |
| if (c == '%' && partiallyEncoded) { |
| // In case there are characters waiting to be encoded. |
| flushEncodingCharBuffer(builder, encoder, cBuffer); |
| builder.append('%'); |
| continue; |
| } |
| |
| if (c == ' ' && isRetained(' ')) { |
| flushEncodingCharBuffer(builder, encoder, cBuffer); |
| builder.append('+'); |
| continue; |
| } |
| |
| if (isWhitelistedOrRetained(c)) { |
| flushEncodingCharBuffer(builder, encoder, cBuffer); |
| builder.append(c); |
| continue; |
| } |
| |
| // Put the character in the queue for encoding. |
| cBuffer.put(c); |
| } |
| flushEncodingCharBuffer(builder, encoder, cBuffer); |
| } |
| |
| private static void flushEncodingCharBuffer( |
| StringBuilder builder, |
| CharsetEncoder encoder, |
| CharBuffer cBuffer) { |
| if (cBuffer.position() == 0) { |
| return; |
| } |
| // We are reading from the buffer now. |
| cBuffer.flip(); |
| ByteBuffer byteBuffer = ByteBuffer.allocate( |
| cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar())); |
| byteBuffer.position(0); |
| CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */); |
| // According to the {@code CharsetEncoder#encode} spec, the method returns underflow |
| // and leaves an empty output when all bytes were processed correctly. |
| if (result != CoderResult.UNDERFLOW) { |
| throw new IllegalArgumentException( |
| "Error encoding, unexpected result [" |
| + result.toString() |
| + "] using encoder for [" |
| + encoder.charset().name() |
| + "]"); |
| } |
| if (cBuffer.hasRemaining()) { |
| throw new IllegalArgumentException( |
| "Encoder for [" + encoder.charset().name() + "] failed with underflow with " |
| + "remaining input [" + cBuffer + "]"); |
| } |
| // Need to flush in case the encoder saves internal state. |
| encoder.flush(byteBuffer); |
| if (result != CoderResult.UNDERFLOW) { |
| throw new IllegalArgumentException( |
| "Error encoding, unexpected result [" |
| + result.toString() |
| + "] flushing encoder for [" |
| + encoder.charset().name() |
| + "]"); |
| } |
| encoder.reset(); |
| |
| byteBuffer.flip(); |
| // Write the encoded bytes. |
| while(byteBuffer.hasRemaining()) { |
| byte b = byteBuffer.get(); |
| builder.append('%'); |
| builder.append(intToHexDigit((b & 0xf0) >>> 4)); |
| builder.append(intToHexDigit(b & 0x0f)); |
| |
| } |
| // Use the character buffer to write again. |
| cBuffer.flip(); |
| cBuffer.limit(cBuffer.capacity()); |
| } |
| |
| private static char intToHexDigit(int b) { |
| if (b < 10) { |
| return (char) ('0' + b); |
| } else { |
| return (char) ('A' + b - 10); |
| } |
| } |
| |
| /** |
| * Decode a string according to the rules of this decoder. |
| * |
| * - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘ |
| * (white space) |
| * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for |
| * invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets. |
| */ |
| public static String decode( |
| String s, boolean convertPlus, Charset charset, boolean throwOnFailure) { |
| StringBuilder builder = new StringBuilder(s.length()); |
| appendDecoded(builder, s, convertPlus, charset, throwOnFailure); |
| return builder.toString(); |
| } |
| |
| /** |
| * Character to be output when there's an error decoding an input. |
| */ |
| private static final char INVALID_INPUT_CHARACTER = '\ufffd'; |
| |
| private static void appendDecoded( |
| StringBuilder builder, |
| String s, |
| boolean convertPlus, |
| Charset charset, |
| boolean throwOnFailure) { |
| CharsetDecoder decoder = charset.newDecoder() |
| .onMalformedInput(CodingErrorAction.REPLACE) |
| .replaceWith("\ufffd") |
| .onUnmappableCharacter(CodingErrorAction.REPORT); |
| // Holds the bytes corresponding to the escaped chars being read (empty if the last char |
| // wasn't a escaped char). |
| ByteBuffer byteBuffer = ByteBuffer.allocate(s.length()); |
| int i = 0; |
| while (i < s.length()) { |
| char c = s.charAt(i); |
| i++; |
| switch (c) { |
| case '+': |
| flushDecodingByteAccumulator( |
| builder, decoder, byteBuffer, throwOnFailure); |
| builder.append(convertPlus ? ' ' : '+'); |
| break; |
| case '%': |
| // Expect two characters representing a number in hex. |
| byte hexValue = 0; |
| for (int j = 0; j < 2; j++) { |
| try { |
| c = getNextCharacter(s, i, s.length(), null /* name */); |
| } catch (URISyntaxException e) { |
| // Unexpected end of input. |
| if (throwOnFailure) { |
| throw new IllegalArgumentException(e); |
| } else { |
| flushDecodingByteAccumulator( |
| builder, decoder, byteBuffer, throwOnFailure); |
| builder.append(INVALID_INPUT_CHARACTER); |
| return; |
| } |
| } |
| i++; |
| int newDigit = hexCharToValue(c); |
| if (newDigit < 0) { |
| if (throwOnFailure) { |
| throw new IllegalArgumentException( |
| unexpectedCharacterException(s, null /* name */, c, i - 1)); |
| } else { |
| flushDecodingByteAccumulator( |
| builder, decoder, byteBuffer, throwOnFailure); |
| builder.append(INVALID_INPUT_CHARACTER); |
| break; |
| } |
| } |
| hexValue = (byte) (hexValue * 0x10 + newDigit); |
| } |
| byteBuffer.put(hexValue); |
| break; |
| default: |
| flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); |
| builder.append(c); |
| } |
| } |
| flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); |
| } |
| |
| private static void flushDecodingByteAccumulator( |
| StringBuilder builder, |
| CharsetDecoder decoder, |
| ByteBuffer byteBuffer, |
| boolean throwOnFailure) { |
| if (byteBuffer.position() == 0) { |
| return; |
| } |
| byteBuffer.flip(); |
| try { |
| builder.append(decoder.decode(byteBuffer)); |
| } catch (CharacterCodingException e) { |
| if (throwOnFailure) { |
| throw new IllegalArgumentException(e); |
| } else { |
| builder.append(INVALID_INPUT_CHARACTER); |
| } |
| } finally { |
| // Use the byte buffer to write again. |
| byteBuffer.flip(); |
| byteBuffer.limit(byteBuffer.capacity()); |
| } |
| } |
| |
| /** |
| * Equivalent to {@code decode(s, false, UTF_8, true)} |
| */ |
| public static String decode(String s) { |
| return decode( |
| s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */); |
| } |
| } |