guava-tests/test/com/google/common/escape/ArrayBasedUnicodeEscaperTest.java - platform/external/guava - Git at Google

 /*
  * Copyright (C) 2009 The Guava Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.common.escape;

 import com.google.common.annotations.GwtCompatible;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.escape.testing.EscaperAsserts;

 import junit.framework.TestCase;

 import java.io.IOException;
 import java.util.Map;

 /**
  * @author David Beaumont
  */
 @GwtCompatible
 public class ArrayBasedUnicodeEscaperTest extends TestCase {
   private static final Map<Character, String> NO_REPLACEMENTS =
       ImmutableMap.of();
   private static final Map<Character, String> SIMPLE_REPLACEMENTS =
       ImmutableMap.of(
           '\n', "<newline>",
           '\t', "<tab>",
           '&', "<and>");
   private static final char[] NO_CHARS = new char[0];

   public void testReplacements() throws IOException {
     // In reality this is not a very sensible escaper to have (if you are only
     // escaping elements from a map you would use a ArrayBasedCharEscaper).
     UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS,
         Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
           @Override protected char[] escapeUnsafe(int c) {
             return NO_CHARS;
           }
     };
     EscaperAsserts.assertBasic(escaper);
     assertEquals("<tab>Fish <and> Chips<newline>",
         escaper.escape("\tFish & Chips\n"));

     // Verify that everything else is left unescaped.
     String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
     assertEquals(safeChars, escaper.escape(safeChars));

     // Ensure that Unicode escapers behave correctly wrt badly formed input.
     String badUnicode = "\uDC00\uD800";
     try {
       escaper.escape(badUnicode);
       fail("should fail for bad Unicode");
     } catch (IllegalArgumentException e) {
       // Pass
     }
   }

   public void testSafeRange() throws IOException {
     // Basic escaping of unsafe chars (wrap them in {,}'s)
     UnicodeEscaper wrappingEscaper =
         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
           @Override protected char[] escapeUnsafe(int c) {
             return ("{" + (char) c + "}").toCharArray();
           }
         };
     EscaperAsserts.assertBasic(wrappingEscaper);
     // '[' and '@' lie either side of [A-Z].
     assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
   }

   public void testDeleteUnsafeChars() throws IOException {
     UnicodeEscaper deletingEscaper =
         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
           @Override protected char[] escapeUnsafe(int c) {
             return NO_CHARS;
           }
         };
     EscaperAsserts.assertBasic(deletingEscaper);
     assertEquals("Everything outside the printable ASCII range is deleted.",
         deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " +
             "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
   }

   public void testReplacementPriority() throws IOException {
     UnicodeEscaper replacingEscaper =
         new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
           private final char[] unknown = new char[] { '?' };
           @Override protected char[] escapeUnsafe(int c) {
             return unknown;
           }
         };
     EscaperAsserts.assertBasic(replacingEscaper);

     // Replacements are applied first regardless of whether the character is in
     // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
     assertEquals("<tab>Fish <and>? Chips?<newline>",
         replacingEscaper.escape("\tFish &\0 Chips\r\n"));
   }

   public void testCodePointsFromSurrogatePairs() throws IOException {
     UnicodeEscaper surrogateEscaper =
         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
           private final char[] escaped = new char[] { 'X' };
           @Override protected char[] escapeUnsafe(int c) {
             return escaped;
           }
         };
     EscaperAsserts.assertBasic(surrogateEscaper);

     // A surrogate pair defining a code point within the safe range.
     String safeInput = "\uD800\uDC00";  // 0x10000
     assertEquals(safeInput, surrogateEscaper.escape(safeInput));

     // A surrogate pair defining a code point outside the safe range (but both
     // of the surrogate characters lie within the safe range). It is important
     // not to accidentally treat this as a sequence of safe characters.
     String unsafeInput = "\uDBFF\uDFFF";  // 0x10FFFF
     assertEquals("X", surrogateEscaper.escape(unsafeInput));
   }
 }
	/*
	* Copyright (C) 2009 The Guava Authors
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.google.common.escape;

	import com.google.common.annotations.GwtCompatible;
	import com.google.common.collect.ImmutableMap;
	import com.google.common.escape.testing.EscaperAsserts;

	import junit.framework.TestCase;

	import java.io.IOException;
	import java.util.Map;

	/**
	* @author David Beaumont
	*/
	@GwtCompatible
	public class ArrayBasedUnicodeEscaperTest extends TestCase {
	private static final Map<Character, String> NO_REPLACEMENTS =
	ImmutableMap.of();
	private static final Map<Character, String> SIMPLE_REPLACEMENTS =
	ImmutableMap.of(
	'\n', "<newline>",
	'\t', "<tab>",
	'&', "<and>");
	private static final char[] NO_CHARS = new char[0];

	public void testReplacements() throws IOException {
	// In reality this is not a very sensible escaper to have (if you are only
	// escaping elements from a map you would use a ArrayBasedCharEscaper).
	UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS,
	Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
	@Override protected char[] escapeUnsafe(int c) {
	return NO_CHARS;
	}
	};
	EscaperAsserts.assertBasic(escaper);
	assertEquals("<tab>Fish <and> Chips<newline>",
	escaper.escape("\tFish & Chips\n"));

	// Verify that everything else is left unescaped.
	String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
	assertEquals(safeChars, escaper.escape(safeChars));

	// Ensure that Unicode escapers behave correctly wrt badly formed input.
	String badUnicode = "\uDC00\uD800";
	try {
	escaper.escape(badUnicode);
	fail("should fail for bad Unicode");
	} catch (IllegalArgumentException e) {
	// Pass
	}
	}

	public void testSafeRange() throws IOException {
	// Basic escaping of unsafe chars (wrap them in {,}'s)
	UnicodeEscaper wrappingEscaper =
	new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
	@Override protected char[] escapeUnsafe(int c) {
	return ("{" + (char) c + "}").toCharArray();
	}
	};
	EscaperAsserts.assertBasic(wrappingEscaper);
	// '[' and '@' lie either side of [A-Z].
	assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
	}

	public void testDeleteUnsafeChars() throws IOException {
	UnicodeEscaper deletingEscaper =
	new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
	@Override protected char[] escapeUnsafe(int c) {
	return NO_CHARS;
	}
	};
	EscaperAsserts.assertBasic(deletingEscaper);
	assertEquals("Everything outside the printable ASCII range is deleted.",
	deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " +
	"printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
	}

	public void testReplacementPriority() throws IOException {
	UnicodeEscaper replacingEscaper =
	new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
	private final char[] unknown = new char[] { '?' };
	@Override protected char[] escapeUnsafe(int c) {
	return unknown;
	}
	};
	EscaperAsserts.assertBasic(replacingEscaper);

	// Replacements are applied first regardless of whether the character is in
	// the safe range or not ('&' is a safe char while '\t' and '\n' are not).
	assertEquals("<tab>Fish <and>? Chips?<newline>",
	replacingEscaper.escape("\tFish &\0 Chips\r\n"));
	}

	public void testCodePointsFromSurrogatePairs() throws IOException {
	UnicodeEscaper surrogateEscaper =
	new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
	private final char[] escaped = new char[] { 'X' };
	@Override protected char[] escapeUnsafe(int c) {
	return escaped;
	}
	};
	EscaperAsserts.assertBasic(surrogateEscaper);

	// A surrogate pair defining a code point within the safe range.
	String safeInput = "\uD800\uDC00"; // 0x10000
	assertEquals(safeInput, surrogateEscaper.escape(safeInput));

	// A surrogate pair defining a code point outside the safe range (but both
	// of the surrogate characters lie within the safe range). It is important
	// not to accidentally treat this as a sequence of safe characters.
	String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF
	assertEquals("X", surrogateEscaper.escape(unsafeInput));
	}
	}