blob: 6ecac913401e5cc1d7b217d681adb10155ff6226 [file] [log] [blame]
/*
* Copyright (C) 2009 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.escape;
import com.google.common.annotations.GwtCompatible;
import com.google.common.collect.ImmutableMap;
import com.google.common.escape.testing.EscaperAsserts;
import junit.framework.TestCase;
import java.io.IOException;
import java.util.Map;
/**
* @author David Beaumont
*/
@GwtCompatible
public class ArrayBasedUnicodeEscaperTest extends TestCase {
private static final Map<Character, String> NO_REPLACEMENTS =
ImmutableMap.of();
private static final Map<Character, String> SIMPLE_REPLACEMENTS =
ImmutableMap.of(
'\n', "<newline>",
'\t', "<tab>",
'&', "<and>");
private static final char[] NO_CHARS = new char[0];
public void testReplacements() throws IOException {
// In reality this is not a very sensible escaper to have (if you are only
// escaping elements from a map you would use a ArrayBasedCharEscaper).
UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS,
Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
@Override protected char[] escapeUnsafe(int c) {
return NO_CHARS;
}
};
EscaperAsserts.assertBasic(escaper);
assertEquals("<tab>Fish <and> Chips<newline>",
escaper.escape("\tFish & Chips\n"));
// Verify that everything else is left unescaped.
String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
assertEquals(safeChars, escaper.escape(safeChars));
// Ensure that Unicode escapers behave correctly wrt badly formed input.
String badUnicode = "\uDC00\uD800";
try {
escaper.escape(badUnicode);
fail("should fail for bad Unicode");
} catch (IllegalArgumentException e) {
// Pass
}
}
public void testSafeRange() throws IOException {
// Basic escaping of unsafe chars (wrap them in {,}'s)
UnicodeEscaper wrappingEscaper =
new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
@Override protected char[] escapeUnsafe(int c) {
return ("{" + (char) c + "}").toCharArray();
}
};
EscaperAsserts.assertBasic(wrappingEscaper);
// '[' and '@' lie either side of [A-Z].
assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
}
public void testDeleteUnsafeChars() throws IOException {
UnicodeEscaper deletingEscaper =
new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
@Override protected char[] escapeUnsafe(int c) {
return NO_CHARS;
}
};
EscaperAsserts.assertBasic(deletingEscaper);
assertEquals("Everything outside the printable ASCII range is deleted.",
deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " +
"printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
}
public void testReplacementPriority() throws IOException {
UnicodeEscaper replacingEscaper =
new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
private final char[] unknown = new char[] { '?' };
@Override protected char[] escapeUnsafe(int c) {
return unknown;
}
};
EscaperAsserts.assertBasic(replacingEscaper);
// Replacements are applied first regardless of whether the character is in
// the safe range or not ('&' is a safe char while '\t' and '\n' are not).
assertEquals("<tab>Fish <and>? Chips?<newline>",
replacingEscaper.escape("\tFish &\0 Chips\r\n"));
}
public void testCodePointsFromSurrogatePairs() throws IOException {
UnicodeEscaper surrogateEscaper =
new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
private final char[] escaped = new char[] { 'X' };
@Override protected char[] escapeUnsafe(int c) {
return escaped;
}
};
EscaperAsserts.assertBasic(surrogateEscaper);
// A surrogate pair defining a code point within the safe range.
String safeInput = "\uD800\uDC00"; // 0x10000
assertEquals(safeInput, surrogateEscaper.escape(safeInput));
// A surrogate pair defining a code point outside the safe range (but both
// of the surrogate characters lie within the safe range). It is important
// not to accidentally treat this as a sequence of safe characters.
String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF
assertEquals("X", surrogateEscaper.escape(unsafeInput));
}
}