blob: 8fdf6245b4f7ad781bc5038f65752c5530df9571 [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use:
* Copyright (C) 2003-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
* @author ram
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
public class IDNAReference {
private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
private static final int ACE_PREFIX_LENGTH = 4;
private static final int MAX_LABEL_LENGTH = 63;
private static final int HYPHEN = 0x002D;
private static final int CAPITAL_A = 0x0041;
private static final int CAPITAL_Z = 0x005A;
private static final int LOWER_CASE_DELTA = 0x0020;
private static final int FULL_STOP = 0x002E;
public static final int DEFAULT = 0x0000;
public static final int ALLOW_UNASSIGNED = 0x0001;
public static final int USE_STD3_RULES = 0x0002;
public static final NamePrepTransform transform = NamePrepTransform.getInstance();
public static boolean isReady() {
return transform.isReady();
private static boolean startsWithPrefix(StringBuffer src){
boolean startsWithPrefix = true;
if(src.length() < ACE_PREFIX_LENGTH){
return false;
for(int i=0; i<ACE_PREFIX_LENGTH;i++){
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
startsWithPrefix = false;
return startsWithPrefix;
private static char toASCIILower(char ch){
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
return (char)(ch + LOWER_CASE_DELTA);
return ch;
private static StringBuffer toASCIILower(StringBuffer src){
StringBuffer dest = new StringBuffer();
for(int i=0; i<src.length();i++){
return dest;
private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
char c1,c2;
int rc;
for(int i =0;/* no condition */;i++) {
/* If we reach the ends of both strings then they match */
if(i == s1.length()) {
return 0;
c1 = s1.charAt(i);
c2 = s2.charAt(i);
/* Case-insensitive comparison */
if(c1!=c2) {
if(rc!=0) {
return rc;
private static int getSeparatorIndex(char[] src,int start, int limit){
for(; start<limit;start++){
return start;
// we have not found the separator just return length
return start;
private static boolean isLDHChar(int ch){
// high runner case
return false;
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
return true;
return false;
public static StringBuffer convertToASCII(String src, int options)
throws StringPrepParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
public static StringBuffer convertToASCII(StringBuffer src, int options)
throws StringPrepParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
throws StringPrepParseException{
char[] caseFlags = null;
// the source contains all ascii codepoints
boolean srcIsASCII = true;
// assume the source contains all LDH codepoints
boolean srcIsLDH = true;
//get the options
boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
int ch;
// step 1
while((ch =!= UCharacterIterator.DONE){
if(ch> 0x7f){
srcIsASCII = false;
int failPos = -1;
StringBuffer processOut = null;
// step 2 is performed only if the source contains non ASCII
// step 2
processOut = transform.prepare(srcIter,options);
processOut = new StringBuffer(srcIter.getText());
int poLen = processOut.length();
throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
StringBuffer dest = new StringBuffer();
// reset the variable to verify if output of prepare is ASCII or not
srcIsASCII = true;
// step 3 & 4
for(int j=0;j<poLen;j++ ){
if(ch > 0x7F){
srcIsASCII = false;
}else if(isLDHChar(ch)==false){
// here we do not assemble surrogates
// since we know that LDH code points
// are in the ASCII range only
srcIsLDH = false;
failPos = j;
if(useSTD3ASCIIRules == true){
// verify 3a and 3b
if( srcIsLDH == false /* source contains some non-LDH characters */
|| processOut.charAt(0) == HYPHEN
|| processOut.charAt(processOut.length()-1) == HYPHEN){
/* populate the parseError struct */
throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
(failPos>0) ? (failPos-1) : failPos);
}else if(processOut.charAt(0) == HYPHEN){
throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
(poLen>0) ? poLen-1 : poLen);
dest = processOut;
// step 5 : verify the sequence does not begin with ACE prefix
//step 6: encode the sequence with punycode
StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
// convert all codepoints to lower case ASCII
StringBuffer lowerOut = toASCIILower(punyout);
//Step 7: prepend the ACE prefix
//Step 6: copy the contents in b2 into dest
throw new StringPrepParseException("The input does not start with the ACE Prefix.",
if(dest.length() > MAX_LABEL_LENGTH){
throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
return dest;
public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
throws StringPrepParseException{
return convertIDNToASCII(iter.getText(), options);
public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
throws StringPrepParseException{
return convertIDNToASCII(str.toString(), options);
public static StringBuffer convertIDNToASCII(String src,int options)
throws StringPrepParseException{
char[] srcArr = src.toCharArray();
StringBuffer result = new StringBuffer();
int sepIndex=0;
int oldSepIndex = 0;
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
//make sure this is not a root label separator.
if(!(label.length()==0 && sepIndex==srcArr.length)){
UCharacterIterator iter = UCharacterIterator.getInstance(label);
// increment the sepIndex to skip past the separator
oldSepIndex = sepIndex;
return result;
public static StringBuffer convertToUnicode(String src, int options)
throws StringPrepParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
public static StringBuffer convertToUnicode(StringBuffer src, int options)
throws StringPrepParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
throws StringPrepParseException{
// the source contains all ascii codepoints
boolean srcIsASCII = true;
int ch;
int saveIndex = iter.getIndex();
// step 1: find out if all the codepoints in src are ASCII
while ((ch = != UCharacterIterator.DONE) {
if (ch > 0x7F) {
srcIsASCII = false;
// The RFC states that
// <quote>
// ToUnicode never fails. If any step fails, then the original input
// is returned immediately in that step.
// </quote>
do {
StringBuffer processOut;
if (srcIsASCII == false) {
// step 2: process the string
try {
processOut = transform.prepare(iter, options);
} catch (StringPrepParseException e) {
} else {
// just point to source
processOut = new StringBuffer(iter.getText());
// step 3: verify ACE Prefix
if (startsWithPrefix(processOut)) {
// step 4: Remove the ACE Prefix
String temp = processOut.substring(ACE_PREFIX_LENGTH, processOut.length());
// step 5: Decode using punycode
StringBuffer decodeOut = null;
try {
decodeOut = PunycodeReference.decode(new StringBuffer(temp), null);
} catch (StringPrepParseException e) {
// step 6:Apply toASCII
StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
// step 7: verify
if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) {
// step 8: return output of step 5
return decodeOut;
} while (false);
return new StringBuffer(iter.getText());
public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
throws StringPrepParseException{
return convertIDNToUnicode(iter.getText(), options);
public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
throws StringPrepParseException{
return convertIDNToUnicode(str.toString(), options);
public static StringBuffer convertIDNToUnicode(String src, int options)
throws StringPrepParseException{
char[] srcArr = src.toCharArray();
StringBuffer result = new StringBuffer();
int sepIndex=0;
int oldSepIndex=0;
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
if(label.length()==0 && sepIndex!=srcArr.length ){
throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
UCharacterIterator iter = UCharacterIterator.getInstance(label);
// increment the sepIndex to skip past the separator
oldSepIndex = sepIndex;
return result;
// TODO: optimize
public static int compare(StringBuffer s1, StringBuffer s2, int options)
throws StringPrepParseException{
if(s1==null || s2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
// TODO: optimize
public static int compare(String s1, String s2, int options)
throws StringPrepParseException{
if(s1==null || s2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
StringBuffer s1Out = convertIDNToASCII(s1, options);
StringBuffer s2Out = convertIDNToASCII(s2, options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
// TODO: optimize
public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
throws StringPrepParseException{
if(i1==null || i2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
return compareCaseInsensitiveASCII(s1Out,s2Out);