guava-tests/test/com/google/common/escape/UnicodeEscaperTest.java - platform/external/guava - Git at Google

 /*
  * Copyright (C) 2008 The Guava Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.common.escape;

 import com.google.common.annotations.GwtCompatible;

 import junit.framework.TestCase;

 /**
  * Tests for {@link UnicodeEscaper}.
  *
  * @author David Beaumont
  */
 @GwtCompatible
 public class UnicodeEscaperTest extends TestCase {

   private static final String SMALLEST_SURROGATE =
       "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
   private static final String LARGEST_SURROGATE =
       "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;

   private static final String TEST_STRING =
       "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" +
       SMALLEST_SURROGATE + "0189" +  LARGEST_SURROGATE;

   // Escapes nothing
   private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() {
     @Override
     protected char[] escape(int c) {
       return null;
     }
   };

   // Escapes everything except [a-zA-Z0-9]
   private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() {
     @Override
     protected char[] escape(int cp) {
       return ('a' <= cp && cp <= 'z') ||
              ('A' <= cp && cp <= 'Z') ||
              ('0' <= cp && cp <= '9') ? null :
           ("[" + String.valueOf(cp) + "]").toCharArray();
     }
   };

   public void testNopEscaper() {
     UnicodeEscaper e = NOP_ESCAPER;
     assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
   }

   public void testSimpleEscaper() {
     UnicodeEscaper e = SIMPLE_ESCAPER;
     String expected =
         "[0]abyz[128][256][2048][4096]ABYZ[65535]" +
         "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" +
         "0189[" + Character.MAX_CODE_POINT + "]";
     assertEquals(expected, escapeAsString(e, TEST_STRING));
   }

   public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
     StringBuffer input = new StringBuffer();
     StringBuffer expected = new StringBuffer();
     for (int i = 256; i < 1024; i++) {
       input.append((char) i);
       expected.append("[" + i + "]");
     }
     assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
   }

   public void testSurrogatePairs() {
     UnicodeEscaper e = SIMPLE_ESCAPER;

     // Build up a range of surrogate pair characters to test
     final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
     final int max = Character.MAX_CODE_POINT;
     final int range = max - min;
     final int s1 = min + (1 * range) / 4;
     final int s2 = min + (2 * range) / 4;
     final int s3 = min + (3 * range) / 4;
     final char[] dst = new char[12];

     // Put surrogate pairs at odd indices so they can be split easily
     dst[0] = 'x';
     Character.toChars(min, dst, 1);
     Character.toChars(s1, dst, 3);
     Character.toChars(s2, dst, 5);
     Character.toChars(s3, dst, 7);
     Character.toChars(max, dst, 9);
     dst[11] = 'x';
     String test = new String(dst);

     // Get the expected result string
     String expected =
         "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
     assertEquals(expected, escapeAsString(e, test));
   }

   public void testTrailingHighSurrogate() {
     String test = "abc" + Character.MIN_HIGH_SURROGATE;
     try {
       escapeAsString(NOP_ESCAPER, test);
       fail("Trailing high surrogate should cause exception");
     } catch (IllegalArgumentException expected) {
       // Pass
     }
     try {
       escapeAsString(SIMPLE_ESCAPER, test);
       fail("Trailing high surrogate should cause exception");
     } catch (IllegalArgumentException expected) {
       // Pass
     }
   }

   public void testNullInput() {
     UnicodeEscaper e = SIMPLE_ESCAPER;
     try {
       e.escape((String) null);
       fail("Null string should cause exception");
     } catch (NullPointerException expected) {
       // Pass
     }
   }

   public void testBadStrings() {
     UnicodeEscaper e = SIMPLE_ESCAPER;
     String[] BAD_STRINGS = {
         String.valueOf(Character.MIN_LOW_SURROGATE),
         Character.MIN_LOW_SURROGATE + "xyz",
         "abc" + Character.MIN_LOW_SURROGATE,
         "abc" + Character.MIN_LOW_SURROGATE + "xyz",
         String.valueOf(Character.MAX_LOW_SURROGATE),
         Character.MAX_LOW_SURROGATE + "xyz",
         "abc" + Character.MAX_LOW_SURROGATE,
         "abc" + Character.MAX_LOW_SURROGATE + "xyz",
     };
     for (String s : BAD_STRINGS) {
       try {
         escapeAsString(e, s);
         fail("Isolated low surrogate should cause exception [" + s + "]");
       } catch (IllegalArgumentException expected) {
         // Pass
       }
     }
   }

   public void testFalsePositivesForNextEscapedIndex() {
     UnicodeEscaper e = new UnicodeEscaper() {
       // Canonical escaper method that only escapes lower case ASCII letters.
       @Override
       protected char[] escape(int cp) {
         return ('a' <= cp && cp <= 'z') ?
             new char[] { Character.toUpperCase((char) cp) } : null;
       }
       // Inefficient implementation that defines all letters as escapable.
       @Override
       protected int nextEscapeIndex(CharSequence csq, int index, int end) {
         while (index < end && !Character.isLetter(csq.charAt(index))) {
           index++;
         }
         return index;
       }
     };
     assertEquals("\0HELLO \uD800\uDC00 WORLD!\n",
         e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
   }

   public void testCodePointAt_IndexOutOfBoundsException() {
     try {
       UnicodeEscaper.codePointAt("Testing...", 4, 2);
       fail();
     } catch (IndexOutOfBoundsException expected) {
     }
   }

   private String escapeAsString(Escaper e, String s) {
     return e.escape(s);
   }
 }
	/*
	* Copyright (C) 2008 The Guava Authors
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.google.common.escape;

	import com.google.common.annotations.GwtCompatible;

	import junit.framework.TestCase;

	/**
	* Tests for {@link UnicodeEscaper}.
	*
	* @author David Beaumont
	*/
	@GwtCompatible
	public class UnicodeEscaperTest extends TestCase {

	private static final String SMALLEST_SURROGATE =
	"" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
	private static final String LARGEST_SURROGATE =
	"" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;

	private static final String TEST_STRING =
	"\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" +
	SMALLEST_SURROGATE + "0189" + LARGEST_SURROGATE;

	// Escapes nothing
	private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() {
	@Override
	protected char[] escape(int c) {
	return null;
	}
	};

	// Escapes everything except [a-zA-Z0-9]
	private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() {
	@Override
	protected char[] escape(int cp) {
	return ('a' <= cp && cp <= 'z') \|\|
	('A' <= cp && cp <= 'Z') \|\|
	('0' <= cp && cp <= '9') ? null :
	("[" + String.valueOf(cp) + "]").toCharArray();
	}
	};

	public void testNopEscaper() {
	UnicodeEscaper e = NOP_ESCAPER;
	assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
	}

	public void testSimpleEscaper() {
	UnicodeEscaper e = SIMPLE_ESCAPER;
	String expected =
	"[0]abyz[128][256][2048][4096]ABYZ[65535]" +
	"[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" +
	"0189[" + Character.MAX_CODE_POINT + "]";
	assertEquals(expected, escapeAsString(e, TEST_STRING));
	}

	public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
	StringBuffer input = new StringBuffer();
	StringBuffer expected = new StringBuffer();
	for (int i = 256; i < 1024; i++) {
	input.append((char) i);
	expected.append("[" + i + "]");
	}
	assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
	}

	public void testSurrogatePairs() {
	UnicodeEscaper e = SIMPLE_ESCAPER;

	// Build up a range of surrogate pair characters to test
	final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
	final int max = Character.MAX_CODE_POINT;
	final int range = max - min;
	final int s1 = min + (1 * range) / 4;
	final int s2 = min + (2 * range) / 4;
	final int s3 = min + (3 * range) / 4;
	final char[] dst = new char[12];

	// Put surrogate pairs at odd indices so they can be split easily
	dst[0] = 'x';
	Character.toChars(min, dst, 1);
	Character.toChars(s1, dst, 3);
	Character.toChars(s2, dst, 5);
	Character.toChars(s3, dst, 7);
	Character.toChars(max, dst, 9);
	dst[11] = 'x';
	String test = new String(dst);

	// Get the expected result string
	String expected =
	"x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
	assertEquals(expected, escapeAsString(e, test));
	}

	public void testTrailingHighSurrogate() {
	String test = "abc" + Character.MIN_HIGH_SURROGATE;
	try {
	escapeAsString(NOP_ESCAPER, test);
	fail("Trailing high surrogate should cause exception");
	} catch (IllegalArgumentException expected) {
	// Pass
	}
	try {
	escapeAsString(SIMPLE_ESCAPER, test);
	fail("Trailing high surrogate should cause exception");
	} catch (IllegalArgumentException expected) {
	// Pass
	}
	}

	public void testNullInput() {
	UnicodeEscaper e = SIMPLE_ESCAPER;
	try {
	e.escape((String) null);
	fail("Null string should cause exception");
	} catch (NullPointerException expected) {
	// Pass
	}
	}

	public void testBadStrings() {
	UnicodeEscaper e = SIMPLE_ESCAPER;
	String[] BAD_STRINGS = {
	String.valueOf(Character.MIN_LOW_SURROGATE),
	Character.MIN_LOW_SURROGATE + "xyz",
	"abc" + Character.MIN_LOW_SURROGATE,
	"abc" + Character.MIN_LOW_SURROGATE + "xyz",
	String.valueOf(Character.MAX_LOW_SURROGATE),
	Character.MAX_LOW_SURROGATE + "xyz",
	"abc" + Character.MAX_LOW_SURROGATE,
	"abc" + Character.MAX_LOW_SURROGATE + "xyz",
	};
	for (String s : BAD_STRINGS) {
	try {
	escapeAsString(e, s);
	fail("Isolated low surrogate should cause exception [" + s + "]");
	} catch (IllegalArgumentException expected) {
	// Pass
	}
	}
	}

	public void testFalsePositivesForNextEscapedIndex() {
	UnicodeEscaper e = new UnicodeEscaper() {
	// Canonical escaper method that only escapes lower case ASCII letters.
	@Override
	protected char[] escape(int cp) {
	return ('a' <= cp && cp <= 'z') ?
	new char[] { Character.toUpperCase((char) cp) } : null;
	}
	// Inefficient implementation that defines all letters as escapable.
	@Override
	protected int nextEscapeIndex(CharSequence csq, int index, int end) {
	while (index < end && !Character.isLetter(csq.charAt(index))) {
	index++;
	}
	return index;
	}
	};
	assertEquals("\0HELLO \uD800\uDC00 WORLD!\n",
	e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
	}

	public void testCodePointAt_IndexOutOfBoundsException() {
	try {
	UnicodeEscaper.codePointAt("Testing...", 4, 2);
	fail();
	} catch (IndexOutOfBoundsException expected) {
	}
	}

	private String escapeAsString(Escaper e, String s) {
	return e.escape(s);
	}
	}