blob: fc302f1e80f3c917cd148a6d89ac727f0532d3e6 [file] [log] [blame]
/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.syncml.pim;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
/**
* This interface is used to parse the V format files, such as VCard & VCal
*/
@Deprecated
abstract public class VParser {
// Assume that "iso-8859-1" is able to map "all" 8bit characters to some unicode and
// decode the unicode to the original charset. If not, this setting will cause some bug.
public static String DEFAULT_CHARSET = "iso-8859-1";
/**
* The buffer used to store input stream
*/
protected String mBuffer = null;
/** The builder to build parsed data */
protected VBuilder mBuilder = null;
/** The encoding type */
protected String mEncoding = null;
protected final int PARSE_ERROR = -1;
protected final String mDefaultEncoding = "8BIT";
/**
* If offset reach '\r\n' return 2. Else return PARSE_ERROR.
*/
protected int parseCrlf(int offset) {
if (offset >= mBuffer.length())
return PARSE_ERROR;
char ch = mBuffer.charAt(offset);
if (ch == '\r') {
offset++;
ch = mBuffer.charAt(offset);
if (ch == '\n') {
return 2;
}
}
return PARSE_ERROR;
}
/**
* Parse the given stream
*
* @param is
* The source to parse.
* @param encoding
* The encoding type.
* @param builder
* The v builder which used to construct data.
* @return Return true for success, otherwise false.
* @throws IOException
*/
public boolean parse(InputStream is, String encoding, VBuilder builder)
throws IOException {
setInputStream(is, encoding);
mBuilder = builder;
int ret = 0, offset = 0, sum = 0;
if (mBuilder != null) {
mBuilder.start();
}
for (;;) {
ret = parseVFile(offset); // for next property length
if (PARSE_ERROR == ret) {
break;
} else {
offset += ret;
sum += ret;
}
}
if (mBuilder != null) {
mBuilder.end();
}
return (mBuffer.length() == sum);
}
/**
* Parse the given stream with the default encoding.
*
* @param is
* The source to parse.
* @param builder
* The v builder which used to construct data.
* @return Return true for success, otherwise false.
* @throws IOException
*/
public boolean parse(InputStream is, VBuilder builder) throws IOException {
return parse(is, DEFAULT_CHARSET, builder);
}
/**
* Copy the content of input stream and filter the "folding"
*/
protected void setInputStream(InputStream is, String encoding)
throws UnsupportedEncodingException {
InputStreamReader reader = new InputStreamReader(is, encoding);
StringBuilder b = new StringBuilder();
int ch;
try {
while ((ch = reader.read()) != -1) {
if (ch == '\r') {
ch = reader.read();
if (ch == '\n') {
ch = reader.read();
if (ch == ' ' || ch == '\t') {
b.append((char) ch);
continue;
}
b.append("\r\n");
if (ch == -1) {
break;
}
} else {
b.append("\r");
}
}
b.append((char) ch);
}
mBuffer = b.toString();
} catch (Exception e) {
return;
}
return;
}
/**
* abstract function, waiting implement.<br>
* analyse from offset, return the length of consumed property.
*/
abstract protected int parseVFile(int offset);
/**
* From offset, jump ' ', '\t', '\r\n' sequence, return the length of jump.<br>
* 1 * (SPACE / HTAB / CRLF)
*/
protected int parseWsls(int offset) {
int ret = 0, sum = 0;
try {
char ch = mBuffer.charAt(offset);
if (ch == ' ' || ch == '\t') {
sum++;
offset++;
} else if ((ret = parseCrlf(offset)) != PARSE_ERROR) {
offset += ret;
sum += ret;
} else {
return PARSE_ERROR;
}
for (;;) {
ch = mBuffer.charAt(offset);
if (ch == ' ' || ch == '\t') {
sum++;
offset++;
} else if ((ret = parseCrlf(offset)) != PARSE_ERROR) {
offset += ret;
sum += ret;
} else {
break;
}
}
} catch (IndexOutOfBoundsException e) {
;
}
if (sum > 0)
return sum;
return PARSE_ERROR;
}
/**
* To determine if the given string equals to the start of the current
* string.
*
* @param offset
* The offset in buffer of current string
* @param tar
* The given string.
* @param ignoreCase
* To determine case sensitive or not.
* @return The consumed characters, otherwise return PARSE_ERROR.
*/
protected int parseString(int offset, final String tar, boolean ignoreCase) {
int sum = 0;
if (tar == null) {
return PARSE_ERROR;
}
if (ignoreCase) {
int len = tar.length();
try {
if (mBuffer.substring(offset, offset + len).equalsIgnoreCase(
tar)) {
sum = len;
} else {
return PARSE_ERROR;
}
} catch (IndexOutOfBoundsException e) {
return PARSE_ERROR;
}
} else { /* case sensitive */
if (mBuffer.startsWith(tar, offset)) {
sum = tar.length();
} else {
return PARSE_ERROR;
}
}
return sum;
}
/**
* Skip the white space in string.
*/
protected int removeWs(int offset) {
if (offset >= mBuffer.length())
return PARSE_ERROR;
int sum = 0;
char ch;
while ((ch = mBuffer.charAt(offset)) == ' ' || ch == '\t') {
offset++;
sum++;
}
return sum;
}
/**
* "X-" word, and its value. Return consumed length.
*/
protected int parseXWord(int offset) {
int ret = 0, sum = 0;
ret = parseString(offset, "X-", true);
if (PARSE_ERROR == ret)
return PARSE_ERROR;
offset += ret;
sum += ret;
ret = parseWord(offset);
if (PARSE_ERROR == ret) {
return PARSE_ERROR;
}
sum += ret;
return sum;
}
/**
* From offset, parse as :mEncoding ?= 7bit / 8bit / quoted-printable /
* base64
*/
protected int parseValue(int offset) {
int ret = 0;
if (mEncoding == null || mEncoding.equalsIgnoreCase("7BIT")
|| mEncoding.equalsIgnoreCase("8BIT")
|| mEncoding.toUpperCase().startsWith("X-")) {
ret = parse8bit(offset);
if (ret != PARSE_ERROR) {
return ret;
}
return PARSE_ERROR;
}
if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
ret = parseQuotedPrintable(offset);
if (ret != PARSE_ERROR) {
return ret;
}
return PARSE_ERROR;
}
if (mEncoding.equalsIgnoreCase("BASE64")) {
ret = parseBase64(offset);
if (ret != PARSE_ERROR) {
return ret;
}
return PARSE_ERROR;
}
return PARSE_ERROR;
}
/**
* Refer to RFC 1521, 8bit text
*/
protected int parse8bit(int offset) {
int index = 0;
index = mBuffer.substring(offset).indexOf("\r\n");
if (index == -1)
return PARSE_ERROR;
else
return index;
}
/**
* Refer to RFC 1521, quoted printable text ([*(ptext / SPACE / TAB) ptext]
* ["="] CRLF)
*/
protected int parseQuotedPrintable(int offset) {
int ret = 0, sum = 0;
ret = removeWs(offset);
offset += ret;
sum += ret;
for (;;) {
ret = parsePtext(offset);
if (PARSE_ERROR == ret)
break;
offset += ret;
sum += ret;
ret = removeWs(offset);
offset += ret;
sum += ret;
}
ret = parseString(offset, "=", false);
if (ret != PARSE_ERROR) {
// offset += ret;
sum += ret;
}
return sum;
}
/**
* return 1 or 3 <any ASCII character except "=", SPACE, or TAB>
*/
protected int parsePtext(int offset) {
int ret = 0;
try {
char ch = mBuffer.charAt(offset);
if (isPrintable(ch) && ch != '=' && ch != ' ' && ch != '\t') {
return 1;
}
} catch (IndexOutOfBoundsException e) {
return PARSE_ERROR;
}
ret = parseOctet(offset);
if (ret != PARSE_ERROR) {
return ret;
}
return PARSE_ERROR;
}
/**
* start with "=" two of (DIGIT / "A" / "B" / "C" / "D" / "E" / "F") <br>
* So maybe return 3.
*/
protected int parseOctet(int offset) {
int ret = 0, sum = 0;
ret = parseString(offset, "=", false);
if (PARSE_ERROR == ret)
return PARSE_ERROR;
offset += ret;
sum += ret;
try {
int ch = mBuffer.charAt(offset);
if (ch == ' ' || ch == '\t')
return ++sum;
if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F')) {
offset++;
sum++;
ch = mBuffer.charAt(offset);
if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F')) {
sum++;
return sum;
}
}
} catch (IndexOutOfBoundsException e) {
;
}
return PARSE_ERROR;
}
/**
* Refer to RFC 1521, base64 text The end of the text is marked with two
* CRLF sequences
*/
protected int parseBase64(int offset) {
int sum = 0;
try {
for (;;) {
char ch;
ch = mBuffer.charAt(offset);
if (ch == '\r') {
int ret = parseString(offset, "\r\n\r\n", false);
sum += ret;
break;
} else {
/* ignore none base64 character */
sum++;
offset++;
}
}
} catch (IndexOutOfBoundsException e) {
return PARSE_ERROR;
}
sum -= 2;/* leave one CRLF to parse the end of this property */
return sum;
}
/**
* Any printable ASCII sequence except [ ]=:.,;
*/
protected int parseWord(int offset) {
int sum = 0;
try {
for (;;) {
char ch = mBuffer.charAt(offset);
if (!isPrintable(ch))
break;
if (ch == ' ' || ch == '=' || ch == ':' || ch == '.'
|| ch == ',' || ch == ';')
break;
if (ch == '\\') {
ch = mBuffer.charAt(offset + 1);
if (ch == ';') {
offset++;
sum++;
}
}
offset++;
sum++;
}
} catch (IndexOutOfBoundsException e) {
;
}
if (sum == 0)
return PARSE_ERROR;
return sum;
}
/**
* If it is a letter or digit.
*/
protected boolean isLetterOrDigit(char ch) {
if (ch >= '0' && ch <= '9')
return true;
if (ch >= 'a' && ch <= 'z')
return true;
if (ch >= 'A' && ch <= 'Z')
return true;
return false;
}
/**
* If it is printable in ASCII
*/
protected boolean isPrintable(char ch) {
if (ch >= ' ' && ch <= '~')
return true;
return false;
}
/**
* If it is a letter.
*/
protected boolean isLetter(char ch) {
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
return true;
}
return false;
}
/**
* Get a word from current position.
*/
protected String getWord(int offset) {
StringBuilder word = new StringBuilder();
try {
for (;;) {
char ch = mBuffer.charAt(offset);
if (isLetterOrDigit(ch) || ch == '-') {
word.append(ch);
offset++;
} else {
break;
}
}
} catch (IndexOutOfBoundsException e) {
;
}
return word.toString();
}
/**
* If is: "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
*/
protected int parsePValueVal(int offset) {
int ret = 0, sum = 0;
ret = parseString(offset, "INLINE", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "URL", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "CONTENT-ID", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "CID", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "INLINE", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseXWord(offset);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
return PARSE_ERROR;
}
/**
* If is: "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word and
* set mEncoding.
*/
protected int parsePEncodingVal(int offset) {
int ret = 0, sum = 0;
ret = parseString(offset, "7BIT", true);
if (ret != PARSE_ERROR) {
mEncoding = "7BIT";
sum += ret;
return sum;
}
ret = parseString(offset, "8BIT", true);
if (ret != PARSE_ERROR) {
mEncoding = "8BIT";
sum += ret;
return sum;
}
ret = parseString(offset, "QUOTED-PRINTABLE", true);
if (ret != PARSE_ERROR) {
mEncoding = "QUOTED-PRINTABLE";
sum += ret;
return sum;
}
ret = parseString(offset, "BASE64", true);
if (ret != PARSE_ERROR) {
mEncoding = "BASE64";
sum += ret;
return sum;
}
ret = parseXWord(offset);
if (ret != PARSE_ERROR) {
mEncoding = mBuffer.substring(offset).substring(0, ret);
sum += ret;
return sum;
}
return PARSE_ERROR;
}
/**
* Refer to RFC1521, section 7.1<br>
* If is: "us-ascii" / "iso-8859-xxx" / "X-" word
*/
protected int parseCharsetVal(int offset) {
int ret = 0, sum = 0;
ret = parseString(offset, "us-ascii", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-1", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-2", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-3", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-4", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-5", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-6", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-7", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-8", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseString(offset, "iso-8859-9", true);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
ret = parseXWord(offset);
if (ret != PARSE_ERROR) {
sum += ret;
return sum;
}
return PARSE_ERROR;
}
/**
* Refer to RFC 1766<br>
* like: XXX(sequence letters)-XXX(sequence letters)
*/
protected int parseLangVal(int offset) {
int ret = 0, sum = 0;
ret = parseTag(offset);
if (PARSE_ERROR == ret) {
return PARSE_ERROR;
}
offset += ret;
sum += ret;
for (;;) {
ret = parseString(offset, "-", false);
if (PARSE_ERROR == ret) {
break;
}
offset += ret;
sum += ret;
ret = parseTag(offset);
if (PARSE_ERROR == ret) {
break;
}
offset += ret;
sum += ret;
}
return sum;
}
/**
* From first 8 position, is sequence LETTER.
*/
protected int parseTag(int offset) {
int sum = 0, i = 0;
try {
for (i = 0; i < 8; i++) {
char ch = mBuffer.charAt(offset);
if (!isLetter(ch)) {
break;
}
sum++;
offset++;
}
} catch (IndexOutOfBoundsException e) {
;
}
if (i == 0) {
return PARSE_ERROR;
}
return sum;
}
}