blob: 240e91091727ce3e0ff1c093ffb4340222890f32 [file] [log] [blame]
/*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
package libcore.net;
import java.io.ByteArrayOutputStream;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
/**
* Encodes and decodes “application/x-www-form-urlencoded” content.
*
* Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t.
* Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to
* its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form.
* e.g a 4 byte sequence might look like” %c6%ef%e0%e8”
*/
public abstract class UriCodec {
/**
* Returns true iff. ‘c’ does not need to be escaped.
* 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be
* escaped. This set is referred to as the ``whitelist''.
*/
protected abstract boolean isRetained(char c);
private static boolean isWhitelisted(char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9');
}
private boolean isWhitelistedOrRetained(char c) {
return isWhitelisted(c) || isRetained(c);
}
/**
* Throw URISyntaxException if any of the characters in the range [start, end) are not valid
* according to this codec.
* - If a char is in the whitelist or retained, it is valid both escaped and unescaped.
* - All escaped octets appearing in the input are structurally valid hex, i.e convertible to
* decimals.
*
* On success, the substring [start, end) is returned.
* {@code name} is not used, except to generate debugging info.
*/
public final String validate(String uri, int start, int end, String name)
throws URISyntaxException {
int i = start;
while (i < end) {
char c = uri.charAt(i++);
if (isWhitelistedOrRetained(c)) {
continue;
}
// c is either '%' or character not allowed in a uri.
if (c != '%') {
throw unexpectedCharacterException(uri, name, c, i - 1);
}
// Expect two characters representing a number in hex.
for (int j = 0; j < 2; j++) {
c = getNextCharacter(uri, i++, end, name);
if (hexCharToValue(c) < 0) {
throw unexpectedCharacterException(uri, name, c, i - 1);
}
}
}
return uri.substring(start, end);
}
/**
* Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f').
*/
private static int hexCharToValue(char c) {
if('0' <= c && c <= '9') {
return c - '0';
}
if ('a' <= c && c <= 'f') {
return 10 + c - 'a';
}
if ('A' <= c && c <= 'F') {
return 10 + c - 'A';
}
return -1;
}
private static URISyntaxException unexpectedCharacterException(
String uri, String name, char unexpected, int index) {
String nameString = (name == null) ? "" : " in [" + name + "]";
return new URISyntaxException(
uri, "Unexpected character" + nameString + ": " + unexpected, index);
}
private static char getNextCharacter(String uri, int index, int end, String name)
throws URISyntaxException {
if (index >= end) {
String nameString = (name == null) ? "" : " in [" + name + "]";
throw new URISyntaxException(
uri, "Unexpected end of string" + nameString, index);
}
return uri.charAt(index);
}
/**
* Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor
* in {@code legal}.
*/
public static void validateSimple(String uri, String legal) throws URISyntaxException {
for (int i = 0; i < uri.length(); i++) {
char c = uri.charAt(i);
if (!isWhitelisted(c) && legal.indexOf(c) < 0) {
throw unexpectedCharacterException(uri, null /* name */, c, i);
}
}
}
/**
* Encodes the string {@code s} as per the rules of this encoder (see class level comment).
*
* @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes.
*/
public final String encode(String s, Charset charset) {
StringBuilder builder = new StringBuilder(s.length());
appendEncoded(builder, s, charset, false);
return builder.toString();
}
/**
* Encodes the string {@code s} as per the rules of this encoder (see class level comment).
*
* Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
*/
public final void appendEncoded(StringBuilder builder, String s) {
appendEncoded(builder, s, StandardCharsets.UTF_8, false);
}
/**
* Encodes the string {@code s} as per the rules of this encoder (see class level comment).
*
* Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
* This method must produce partially encoded output. What this means is that if encoded octets
* appear in the input string, they are passed through unmodified, instead of being double
* escaped. Consider a decoder operating on the global whitelist dealing with a string
* “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it
* will be double encoded into “foo%2525bar”.
*/
public final void appendPartiallyEncoded(StringBuilder builder, String s) {
appendEncoded(builder, s, StandardCharsets.UTF_8, true);
}
private void appendEncoded(
StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) {
CharsetEncoder encoder = charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
CharBuffer cBuffer = CharBuffer.allocate(s.length());
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '%' && partiallyEncoded) {
// In case there are characters waiting to be encoded.
flushEncodingCharBuffer(builder, encoder, cBuffer);
builder.append('%');
continue;
}
if (c == ' ' && isRetained(' ')) {
flushEncodingCharBuffer(builder, encoder, cBuffer);
builder.append('+');
continue;
}
if (isWhitelistedOrRetained(c)) {
flushEncodingCharBuffer(builder, encoder, cBuffer);
builder.append(c);
continue;
}
// Put the character in the queue for encoding.
cBuffer.put(c);
}
flushEncodingCharBuffer(builder, encoder, cBuffer);
}
private static void flushEncodingCharBuffer(
StringBuilder builder,
CharsetEncoder encoder,
CharBuffer cBuffer) {
if (cBuffer.position() == 0) {
return;
}
// We are reading from the buffer now.
cBuffer.flip();
ByteBuffer byteBuffer = ByteBuffer.allocate(
cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar()));
byteBuffer.position(0);
CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */);
// According to the {@code CharsetEncoder#encode} spec, the method returns underflow
// and leaves an empty output when all bytes were processed correctly.
if (result != CoderResult.UNDERFLOW) {
throw new IllegalArgumentException(
"Error encoding, unexpected result ["
+ result.toString()
+ "] using encoder for ["
+ encoder.charset().name()
+ "]");
}
if (cBuffer.hasRemaining()) {
throw new IllegalArgumentException(
"Encoder for [" + encoder.charset().name() + "] failed with underflow with "
+ "remaining input [" + cBuffer + "]");
}
// Need to flush in case the encoder saves internal state.
encoder.flush(byteBuffer);
if (result != CoderResult.UNDERFLOW) {
throw new IllegalArgumentException(
"Error encoding, unexpected result ["
+ result.toString()
+ "] flushing encoder for ["
+ encoder.charset().name()
+ "]");
}
encoder.reset();
byteBuffer.flip();
// Write the encoded bytes.
while(byteBuffer.hasRemaining()) {
byte b = byteBuffer.get();
builder.append('%');
builder.append(intToHexDigit((b & 0xf0) >>> 4));
builder.append(intToHexDigit(b & 0x0f));
}
// Use the character buffer to write again.
cBuffer.flip();
cBuffer.limit(cBuffer.capacity());
}
private static char intToHexDigit(int b) {
if (b < 10) {
return (char) ('0' + b);
} else {
return (char) ('A' + b - 10);
}
}
/**
* Decode a string according to the rules of this decoder.
*
* - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘
* (white space)
* - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for
* invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets.
*/
public static String decode(
String s, boolean convertPlus, Charset charset, boolean throwOnFailure) {
StringBuilder builder = new StringBuilder(s.length());
appendDecoded(builder, s, convertPlus, charset, throwOnFailure);
return builder.toString();
}
/**
* Character to be output when there's an error decoding an input.
*/
private static final char INVALID_INPUT_CHARACTER = '\ufffd';
private static void appendDecoded(
StringBuilder builder,
String s,
boolean convertPlus,
Charset charset,
boolean throwOnFailure) {
CharsetDecoder decoder = charset.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.replaceWith("\ufffd")
.onUnmappableCharacter(CodingErrorAction.REPORT);
// Holds the bytes corresponding to the escaped chars being read (empty if the last char
// wasn't a escaped char).
ByteBuffer byteBuffer = ByteBuffer.allocate(s.length());
int i = 0;
while (i < s.length()) {
char c = s.charAt(i);
i++;
switch (c) {
case '+':
flushDecodingByteAccumulator(
builder, decoder, byteBuffer, throwOnFailure);
builder.append(convertPlus ? ' ' : '+');
break;
case '%':
// Expect two characters representing a number in hex.
byte hexValue = 0;
for (int j = 0; j < 2; j++) {
try {
c = getNextCharacter(s, i, s.length(), null /* name */);
} catch (URISyntaxException e) {
// Unexpected end of input.
if (throwOnFailure) {
throw new IllegalArgumentException(e);
} else {
flushDecodingByteAccumulator(
builder, decoder, byteBuffer, throwOnFailure);
builder.append(INVALID_INPUT_CHARACTER);
return;
}
}
i++;
int newDigit = hexCharToValue(c);
if (newDigit < 0) {
if (throwOnFailure) {
throw new IllegalArgumentException(
unexpectedCharacterException(s, null /* name */, c, i - 1));
} else {
flushDecodingByteAccumulator(
builder, decoder, byteBuffer, throwOnFailure);
builder.append(INVALID_INPUT_CHARACTER);
break;
}
}
hexValue = (byte) (hexValue * 0x10 + newDigit);
}
byteBuffer.put(hexValue);
break;
default:
flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
builder.append(c);
}
}
flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
}
private static void flushDecodingByteAccumulator(
StringBuilder builder,
CharsetDecoder decoder,
ByteBuffer byteBuffer,
boolean throwOnFailure) {
if (byteBuffer.position() == 0) {
return;
}
byteBuffer.flip();
try {
builder.append(decoder.decode(byteBuffer));
} catch (CharacterCodingException e) {
if (throwOnFailure) {
throw new IllegalArgumentException(e);
} else {
builder.append(INVALID_INPUT_CHARACTER);
}
} finally {
// Use the byte buffer to write again.
byteBuffer.flip();
byteBuffer.limit(byteBuffer.capacity());
}
}
/**
* Equivalent to {@code decode(s, false, UTF_8, true)}
*/
public static String decode(String s) {
return decode(
s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */);
}
}