test/jdk/java/net/URLEncoder/SurrogatePairs.java - platform/libcore - Git at Google

 /*
  * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 /*
  * @test
  * @bug 4396708
  * @summary Test URL encoder and decoder on a string that contains
  * surrogate pairs.
  *
  */

 import java.io.*;
 import java.net.*;

 /*
  * Surrogate pairs are two character Unicode sequences where the first
  * character lies in the range [d800, dbff] and the second character lies
  * in the range [dc00, dfff]. They are used as an escaping mechanism to add
  * 1M more characters to Unicode.
  */
 public class SurrogatePairs {

     static String[] testStrings = {"\uD800\uDC00",
                                    "\uD800\uDFFF",
                                    "\uDBFF\uDC00",
                                    "\uDBFF\uDFFF",
                                    "1\uDBFF\uDC00",
                                    "@\uDBFF\uDC00",
                                    "\uDBFF\uDC001",
                                    "\uDBFF\uDC00@",
                                    "\u0101\uDBFF\uDC00",
                                    "\uDBFF\uDC00\u0101"
     };

     static String[] correctEncodings = {"%F0%90%80%80",
                                         "%F0%90%8F%BF",
                                         "%F4%8F%B0%80",
                                         "%F4%8F%BF%BF",
                                         "1%F4%8F%B0%80",
                                         "%40%F4%8F%B0%80",
                                         "%F4%8F%B0%801",
                                         "%F4%8F%B0%80%40",
                                         "%C4%81%F4%8F%B0%80",
                                         "%F4%8F%B0%80%C4%81"
     };

     public static void main(String[] args) throws Exception {

         for (int i=0; i < testStrings.length; i++) {
             test(testStrings[i], correctEncodings[i]);
         }
     }

     private static void test(String str, String correctEncoding)
         throws Exception {

         System.out.println("Unicode bytes of test string are: "
                            + getHexBytes(str));

         String encoded = URLEncoder.encode(str, "UTF-8");

         System.out.println("URLEncoding is: " + encoded);

         if (encoded.equals(correctEncoding))
             System.out.println("The encoding is correct!");
         else {
             throw new Exception("The encoding is incorrect!" +
                                 " It should be " + correctEncoding);
         }

         String decoded = URLDecoder.decode(encoded, "UTF-8");

         System.out.println("Unicode bytes for URLDecoding are: "
                            + getHexBytes(decoded));

         if (str.equals(decoded))
             System.out.println("The decoding is correct");
         else {
             throw new Exception("The decoded is not equal to the original");
         }
         System.out.println("---");
     }

     private static String getHexBytes(String s) throws Exception {
         StringBuffer sb = new StringBuffer();
         for (int i = 0; i < s.length(); i++) {

             int a = s.charAt(i);
             int b1 = (a >>8) & 0xff;
             int b2 = (byte)a;
             int b11 = (b1>>4) & 0x0f;
             int b12 = b1 & 0x0f;
             int b21 = (b2 >>4) & 0x0f;
             int b22 = b2 & 0x0f;

             sb.append(Integer.toHexString(b11));
             sb.append(Integer.toHexString(b12));
             sb.append(Integer.toHexString(b21));
             sb.append(Integer.toHexString(b22));
             sb.append(' ');
         }
         return sb.toString();
     }

 }
	/*
	* Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	/*
	* @test
	* @bug 4396708
	* @summary Test URL encoder and decoder on a string that contains
	* surrogate pairs.
	*
	*/

	import java.io.*;
	import java.net.*;

	/*
	* Surrogate pairs are two character Unicode sequences where the first
	* character lies in the range [d800, dbff] and the second character lies
	* in the range [dc00, dfff]. They are used as an escaping mechanism to add
	* 1M more characters to Unicode.
	*/
	public class SurrogatePairs {

	static String[] testStrings = {"\uD800\uDC00",
	"\uD800\uDFFF",
	"\uDBFF\uDC00",
	"\uDBFF\uDFFF",
	"1\uDBFF\uDC00",
	"@\uDBFF\uDC00",
	"\uDBFF\uDC001",
	"\uDBFF\uDC00@",
	"\u0101\uDBFF\uDC00",
	"\uDBFF\uDC00\u0101"
	};

	static String[] correctEncodings = {"%F0%90%80%80",
	"%F0%90%8F%BF",
	"%F4%8F%B0%80",
	"%F4%8F%BF%BF",
	"1%F4%8F%B0%80",
	"%40%F4%8F%B0%80",
	"%F4%8F%B0%801",
	"%F4%8F%B0%80%40",
	"%C4%81%F4%8F%B0%80",
	"%F4%8F%B0%80%C4%81"
	};

	public static void main(String[] args) throws Exception {

	for (int i=0; i < testStrings.length; i++) {
	test(testStrings[i], correctEncodings[i]);
	}
	}

	private static void test(String str, String correctEncoding)
	throws Exception {

	System.out.println("Unicode bytes of test string are: "
	+ getHexBytes(str));

	String encoded = URLEncoder.encode(str, "UTF-8");

	System.out.println("URLEncoding is: " + encoded);

	if (encoded.equals(correctEncoding))
	System.out.println("The encoding is correct!");
	else {
	throw new Exception("The encoding is incorrect!" +
	" It should be " + correctEncoding);
	}

	String decoded = URLDecoder.decode(encoded, "UTF-8");

	System.out.println("Unicode bytes for URLDecoding are: "
	+ getHexBytes(decoded));

	if (str.equals(decoded))
	System.out.println("The decoding is correct");
	else {
	throw new Exception("The decoded is not equal to the original");
	}
	System.out.println("---");
	}

	private static String getHexBytes(String s) throws Exception {
	StringBuffer sb = new StringBuffer();
	for (int i = 0; i < s.length(); i++) {

	int a = s.charAt(i);
	int b1 = (a >>8) & 0xff;
	int b2 = (byte)a;
	int b11 = (b1>>4) & 0x0f;
	int b12 = b1 & 0x0f;
	int b21 = (b2 >>4) & 0x0f;
	int b22 = b2 & 0x0f;

	sb.append(Integer.toHexString(b11));
	sb.append(Integer.toHexString(b12));
	sb.append(Integer.toHexString(b21));
	sb.append(Integer.toHexString(b22));
	sb.append(' ');
	}
	return sb.toString();
	}

	}