blob: 38807ed5a4860df367fc1a0e1a20a9c88333a4ab [file] [log] [blame]
/*
* Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.activation.registries;
/**
* A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
* Useful for parsing MIME content types.
*/
public class MailcapTokenizer {
public static final int UNKNOWN_TOKEN = 0;
public static final int START_TOKEN = 1;
public static final int STRING_TOKEN = 2;
public static final int EOI_TOKEN = 5;
public static final int SLASH_TOKEN = '/';
public static final int SEMICOLON_TOKEN = ';';
public static final int EQUALS_TOKEN = '=';
/**
* Constructor
*
* @parameter inputString the string to tokenize
*/
public MailcapTokenizer(String inputString) {
data = inputString;
dataIndex = 0;
dataLength = inputString.length();
currentToken = START_TOKEN;
currentTokenValue = "";
isAutoquoting = false;
autoquoteChar = ';';
}
/**
* Set whether auto-quoting is on or off.
*
* Auto-quoting means that all characters after the first
* non-whitespace, non-control character up to the auto-quote
* terminator character or EOI (minus any whitespace immediatley
* preceeding it) is considered a token.
*
* This is required for handling command strings in a mailcap entry.
*/
public void setIsAutoquoting(boolean value) {
isAutoquoting = value;
}
/**
* Retrieve current token.
*
* @returns The current token value
*/
public int getCurrentToken() {
return currentToken;
}
/*
* Get a String that describes the given token.
*/
public static String nameForToken(int token) {
String name = "really unknown";
switch(token) {
case UNKNOWN_TOKEN:
name = "unknown";
break;
case START_TOKEN:
name = "start";
break;
case STRING_TOKEN:
name = "string";
break;
case EOI_TOKEN:
name = "EOI";
break;
case SLASH_TOKEN:
name = "'/'";
break;
case SEMICOLON_TOKEN:
name = "';'";
break;
case EQUALS_TOKEN:
name = "'='";
break;
}
return name;
}
/*
* Retrieve current token value.
*
* @returns A String containing the current token value
*/
public String getCurrentTokenValue() {
return currentTokenValue;
}
/*
* Process the next token.
*
* @returns the next token
*/
public int nextToken() {
if (dataIndex < dataLength) {
// skip white space
while ((dataIndex < dataLength) &&
(isWhiteSpaceChar(data.charAt(dataIndex)))) {
++dataIndex;
}
if (dataIndex < dataLength) {
// examine the current character and see what kind of token we have
char c = data.charAt(dataIndex);
if (isAutoquoting) {
if (c == ';' || c == '=') {
currentToken = c;
currentTokenValue = new Character(c).toString();
++dataIndex;
} else {
processAutoquoteToken();
}
} else {
if (isStringTokenChar(c)) {
processStringToken();
} else if ((c == '/') || (c == ';') || (c == '=')) {
currentToken = c;
currentTokenValue = new Character(c).toString();
++dataIndex;
} else {
currentToken = UNKNOWN_TOKEN;
currentTokenValue = new Character(c).toString();
++dataIndex;
}
}
} else {
currentToken = EOI_TOKEN;
currentTokenValue = null;
}
} else {
currentToken = EOI_TOKEN;
currentTokenValue = null;
}
return currentToken;
}
private void processStringToken() {
// capture the initial index
int initialIndex = dataIndex;
// skip to 1st non string token character
while ((dataIndex < dataLength) &&
isStringTokenChar(data.charAt(dataIndex))) {
++dataIndex;
}
currentToken = STRING_TOKEN;
currentTokenValue = data.substring(initialIndex, dataIndex);
}
private void processAutoquoteToken() {
// capture the initial index
int initialIndex = dataIndex;
// now skip to the 1st non-escaped autoquote termination character
// XXX - doesn't actually consider escaping
boolean foundTerminator = false;
while ((dataIndex < dataLength) && !foundTerminator) {
char c = data.charAt(dataIndex);
if (c != autoquoteChar) {
++dataIndex;
} else {
foundTerminator = true;
}
}
currentToken = STRING_TOKEN;
currentTokenValue =
fixEscapeSequences(data.substring(initialIndex, dataIndex));
}
private static boolean isSpecialChar(char c) {
boolean lAnswer = false;
switch(c) {
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ';':
case ':':
case '\\':
case '"':
case '/':
case '[':
case ']':
case '?':
case '=':
lAnswer = true;
break;
}
return lAnswer;
}
private static boolean isControlChar(char c) {
return Character.isISOControl(c);
}
private static boolean isWhiteSpaceChar(char c) {
return Character.isWhitespace(c);
}
private static boolean isStringTokenChar(char c) {
return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
}
private static String fixEscapeSequences(String inputString) {
int inputLength = inputString.length();
StringBuffer buffer = new StringBuffer();
buffer.ensureCapacity(inputLength);
for (int i = 0; i < inputLength; ++i) {
char currentChar = inputString.charAt(i);
if (currentChar != '\\') {
buffer.append(currentChar);
} else {
if (i < inputLength - 1) {
char nextChar = inputString.charAt(i + 1);
buffer.append(nextChar);
// force a skip over the next character too
++i;
} else {
buffer.append(currentChar);
}
}
}
return buffer.toString();
}
private String data;
private int dataIndex;
private int dataLength;
private int currentToken;
private String currentTokenValue;
private boolean isAutoquoting;
private char autoquoteChar;
/*
public static void main(String[] args) {
for (int i = 0; i < args.length; ++i) {
MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
System.out.println("Original: |" + args[i] + "|");
int currentToken = tokenizer.nextToken();
while (currentToken != EOI_TOKEN) {
switch(currentToken) {
case UNKNOWN_TOKEN:
System.out.println(" Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
case START_TOKEN:
System.out.println(" Start Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
case STRING_TOKEN:
System.out.println(" String Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
case EOI_TOKEN:
System.out.println(" EOI Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
case SLASH_TOKEN:
System.out.println(" Slash Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
case SEMICOLON_TOKEN:
System.out.println(" Semicolon Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
case EQUALS_TOKEN:
System.out.println(" Equals Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
default:
System.out.println(" Really Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|");
break;
}
currentToken = tokenizer.nextToken();
}
System.out.println("");
}
}
*/
}