| /* |
| * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Sun designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Sun in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| * CA 95054 USA or visit www.sun.com if you need additional information or |
| * have any questions. |
| */ |
| package com.sun.xml.internal.bind.api.impl; |
| |
| import java.util.ArrayList; |
| import java.util.HashSet; |
| import java.util.List; |
| |
| /** |
| * Methods that convert strings into various formats. |
| * |
| * <p> |
| * What JAX-RPC name binding tells us is that even such basic method |
| * like "isLetter" can be different depending on the situation. |
| * |
| * For this reason, a whole lot of methods are made non-static, |
| * even though they look like they should be static. |
| */ |
| class NameUtil { |
| protected boolean isPunct(char c) { |
| return c == '-' || c == '.' || c == ':' || c == '_' || c == '\u00b7' || c == '\u0387' || c == '\u06dd' || c == '\u06de'; |
| } |
| |
| protected static boolean isDigit(char c) { |
| return c >= '0' && c <= '9' || Character.isDigit(c); |
| } |
| |
| protected static boolean isUpper(char c) { |
| return c >= 'A' && c <= 'Z' || Character.isUpperCase(c); |
| } |
| |
| protected static boolean isLower(char c) { |
| return c >= 'a' && c <= 'z' || Character.isLowerCase(c); |
| } |
| |
| protected boolean isLetter(char c) { |
| return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || Character.isLetter(c); |
| } |
| |
| /** |
| * Capitalizes the first character of the specified string, |
| * and de-capitalize the rest of characters. |
| */ |
| public String capitalize(String s) { |
| if (!isLower(s.charAt(0))) |
| return s; |
| StringBuilder sb = new StringBuilder(s.length()); |
| sb.append(Character.toUpperCase(s.charAt(0))); |
| sb.append(s.substring(1).toLowerCase()); |
| return sb.toString(); |
| } |
| |
| // Precondition: s[start] is not punctuation |
| private int nextBreak(String s, int start) { |
| int n = s.length(); |
| |
| char c1 = s.charAt(start); |
| int t1 = classify(c1); |
| |
| for (int i=start+1; i<n; i++) { |
| // shift (c1,t1) into (c0,t0) |
| // char c0 = c1; --- conceptually, but c0 won't be used |
| int t0 = t1; |
| |
| c1 = s.charAt(i); |
| t1 = classify(c1); |
| |
| switch(actionTable[t0*5+t1]) { |
| case ACTION_CHECK_PUNCT: |
| if(isPunct(c1)) return i; |
| break; |
| case ACTION_CHECK_C2: |
| if (i < n-1) { |
| char c2 = s.charAt(i+1); |
| if (isLower(c2)) |
| return i; |
| } |
| break; |
| case ACTION_BREAK: |
| return i; |
| } |
| } |
| return -1; |
| } |
| |
| // the 5-category classification that we use in this code |
| // to find work breaks |
| static protected final int UPPER_LETTER = 0; |
| static protected final int LOWER_LETTER = 1; |
| static protected final int OTHER_LETTER = 2; |
| static protected final int DIGIT = 3; |
| static protected final int OTHER = 4; |
| |
| /** |
| * Look up table for actions. |
| * type0*5+type1 would yield the action to be taken. |
| */ |
| private static final byte[] actionTable = new byte[5*5]; |
| |
| // action constants. see nextBreak for the meaning |
| static private final byte ACTION_CHECK_PUNCT = 0; |
| static private final byte ACTION_CHECK_C2 = 1; |
| static private final byte ACTION_BREAK = 2; |
| static private final byte ACTION_NOBREAK = 3; |
| |
| /** |
| * Decide the action to be taken given |
| * the classification of the preceding character 't0' and |
| * the classification of the next character 't1'. |
| */ |
| private static byte decideAction( int t0, int t1 ) { |
| if(t0==OTHER && t1==OTHER) return ACTION_CHECK_PUNCT; |
| if(!xor(t0==DIGIT,t1==DIGIT)) return ACTION_BREAK; |
| if(t0==LOWER_LETTER && t1!=LOWER_LETTER) return ACTION_BREAK; |
| if(!xor(t0<=OTHER_LETTER,t1<=OTHER_LETTER)) return ACTION_BREAK; |
| if(!xor(t0==OTHER_LETTER,t1==OTHER_LETTER)) return ACTION_BREAK; |
| |
| if(t0==UPPER_LETTER && t1==UPPER_LETTER) return ACTION_CHECK_C2; |
| |
| return ACTION_NOBREAK; |
| } |
| |
| private static boolean xor(boolean x,boolean y) { |
| return (x&&y) || (!x&&!y); |
| } |
| |
| static { |
| // initialize the action table |
| for( int t0=0; t0<5; t0++ ) |
| for( int t1=0; t1<5; t1++ ) |
| actionTable[t0*5+t1] = decideAction(t0,t1); |
| } |
| |
| /** |
| * Classify a character into 5 categories that determine the word break. |
| */ |
| protected int classify(char c0) { |
| switch(Character.getType(c0)) { |
| case Character.UPPERCASE_LETTER: return UPPER_LETTER; |
| case Character.LOWERCASE_LETTER: return LOWER_LETTER; |
| case Character.TITLECASE_LETTER: |
| case Character.MODIFIER_LETTER: |
| case Character.OTHER_LETTER: return OTHER_LETTER; |
| case Character.DECIMAL_DIGIT_NUMBER: return DIGIT; |
| default: return OTHER; |
| } |
| } |
| |
| |
| /** |
| * Tokenizes a string into words and capitalizes the first |
| * character of each word. |
| * |
| * <p> |
| * This method uses a change in character type as a splitter |
| * of two words. For example, "abc100ghi" will be splitted into |
| * {"Abc", "100","Ghi"}. |
| */ |
| public List<String> toWordList(String s) { |
| ArrayList<String> ss = new ArrayList<String>(); |
| int n = s.length(); |
| for (int i = 0; i < n;) { |
| |
| // Skip punctuation |
| while (i < n) { |
| if (!isPunct(s.charAt(i))) |
| break; |
| i++; |
| } |
| if (i >= n) break; |
| |
| // Find next break and collect word |
| int b = nextBreak(s, i); |
| String w = (b == -1) ? s.substring(i) : s.substring(i, b); |
| ss.add(escape(capitalize(w))); |
| if (b == -1) break; |
| i = b; |
| } |
| |
| // we can't guarantee a valid Java identifier anyway, |
| // so there's not much point in rejecting things in this way. |
| // if (ss.size() == 0) |
| // throw new IllegalArgumentException("Zero-length identifier"); |
| return ss; |
| } |
| |
| protected String toMixedCaseName(List<String> ss, boolean startUpper) { |
| StringBuilder sb = new StringBuilder(); |
| if(!ss.isEmpty()) { |
| sb.append(startUpper ? ss.get(0) : ss.get(0).toLowerCase()); |
| for (int i = 1; i < ss.size(); i++) |
| sb.append(ss.get(i)); |
| } |
| return sb.toString(); |
| } |
| |
| protected String toMixedCaseVariableName(String[] ss, |
| boolean startUpper, |
| boolean cdrUpper) { |
| if (cdrUpper) |
| for (int i = 1; i < ss.length; i++) |
| ss[i] = capitalize(ss[i]); |
| StringBuilder sb = new StringBuilder(); |
| if( ss.length>0 ) { |
| sb.append(startUpper ? ss[0] : ss[0].toLowerCase()); |
| for (int i = 1; i < ss.length; i++) |
| sb.append(ss[i]); |
| } |
| return sb.toString(); |
| } |
| |
| |
| /** |
| * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF". |
| * |
| * @return |
| * Always return a string but there's no guarantee that |
| * the generated code is a valid Java identifier. |
| */ |
| public String toConstantName(String s) { |
| return toConstantName(toWordList(s)); |
| } |
| |
| /** |
| * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF". |
| * |
| * @return |
| * Always return a string but there's no guarantee that |
| * the generated code is a valid Java identifier. |
| */ |
| public String toConstantName(List<String> ss) { |
| StringBuilder sb = new StringBuilder(); |
| if( !ss.isEmpty() ) { |
| sb.append(ss.get(0).toUpperCase()); |
| for (int i = 1; i < ss.size(); i++) { |
| sb.append('_'); |
| sb.append(ss.get(i).toUpperCase()); |
| } |
| } |
| return sb.toString(); |
| } |
| |
| |
| |
| /** |
| * Escapes characters is the given string so that they can be |
| * printed by only using US-ASCII characters. |
| * |
| * The escaped characters will be appended to the given |
| * StringBuffer. |
| * |
| * @param sb |
| * StringBuffer that receives escaped string. |
| * @param s |
| * String to be escaped. <code>s.substring(start)</code> |
| * will be escaped and copied to the string buffer. |
| */ |
| public static void escape(StringBuilder sb, String s, int start) { |
| int n = s.length(); |
| for (int i = start; i < n; i++) { |
| char c = s.charAt(i); |
| if (Character.isJavaIdentifierPart(c)) |
| sb.append(c); |
| else { |
| sb.append('_'); |
| if (c <= '\u000f') sb.append("000"); |
| else if (c <= '\u00ff') sb.append("00"); |
| else if (c <= '\u0fff') sb.append('0'); |
| sb.append(Integer.toString(c, 16)); |
| } |
| } |
| } |
| |
| /** |
| * Escapes characters that are unusable as Java identifiers |
| * by replacing unsafe characters with safe characters. |
| */ |
| private static String escape(String s) { |
| int n = s.length(); |
| for (int i = 0; i < n; i++) |
| if (!Character.isJavaIdentifierPart(s.charAt(i))) { |
| StringBuilder sb = new StringBuilder(s.substring(0, i)); |
| escape(sb, s, i); |
| return sb.toString(); |
| } |
| return s; |
| } |
| |
| |
| /** |
| * Checks if a given string is usable as a Java identifier. |
| */ |
| public static boolean isJavaIdentifier(String s) { |
| if(s.length()==0) return false; |
| if( reservedKeywords.contains(s) ) return false; |
| |
| if(!Character.isJavaIdentifierStart(s.charAt(0))) return false; |
| |
| for (int i = 1; i < s.length(); i++) |
| if (!Character.isJavaIdentifierPart(s.charAt(i))) |
| return false; |
| |
| return true; |
| } |
| |
| /** |
| * Checks if the given string is a valid Java package name. |
| */ |
| public static boolean isJavaPackageName(String s) { |
| while(s.length()!=0) { |
| int idx = s.indexOf('.'); |
| if(idx==-1) idx=s.length(); |
| if( !isJavaIdentifier(s.substring(0,idx)) ) |
| return false; |
| |
| s = s.substring(idx); |
| if(s.length()!=0) s = s.substring(1); // remove '.' |
| } |
| return true; |
| } |
| |
| |
| /** All reserved keywords of Java. */ |
| private static HashSet<String> reservedKeywords = new HashSet<String>(); |
| |
| static { |
| // see http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html |
| String[] words = new String[]{ |
| "abstract", |
| "boolean", |
| "break", |
| "byte", |
| "case", |
| "catch", |
| "char", |
| "class", |
| "const", |
| "continue", |
| "default", |
| "do", |
| "double", |
| "else", |
| "extends", |
| "final", |
| "finally", |
| "float", |
| "for", |
| "goto", |
| "if", |
| "implements", |
| "import", |
| "instanceof", |
| "int", |
| "interface", |
| "long", |
| "native", |
| "new", |
| "package", |
| "private", |
| "protected", |
| "public", |
| "return", |
| "short", |
| "static", |
| "strictfp", |
| "super", |
| "switch", |
| "synchronized", |
| "this", |
| "throw", |
| "throws", |
| "transient", |
| "try", |
| "void", |
| "volatile", |
| "while", |
| |
| // technically these are not reserved words but they cannot be used as identifiers. |
| "true", |
| "false", |
| "null", |
| |
| // and I believe assert is also a new keyword |
| "assert", |
| |
| // and 5.0 keywords |
| "enum" |
| }; |
| for (String word : words) |
| reservedKeywords.add(word); |
| } |
| } |