blob: 5ba7b522452a78131f970545a8521206ef2325ff [file] [log] [blame]
/*
* Parser.java February 2001
*
* Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.simpleframework.common.parse;
/**
* This <code>Parser</code> object is to be used as a simple template
* for parsing uncomplicated expressions. This object is used to parse
* a <code>String</code>. This provides a few methods that can be used
* to store and track the reading of data from a buffer. There are two
* abstract methods provided to allow this to be subclassed to create
* a <code>Parser</code> for a given <code>String</code>.
*
* @author Niall Gallagher
*/
public abstract class Parser {
/**
* This is the buffer that is being parsed.
*/
protected char[] buf;
/**
* This represents the current read offset.
*/
protected int off;
/**
* This represents the length of the buffer.
*/
protected int count;
/**
* This is a no argument constructor for the <code>Parser</code>.
* This will be invoked by each subclass of this object. It will
* set the buffer to a zero length buffer so that when the
* <code>ensureCapacity</code> method is used the buf's
* length can be checked.
*/
protected Parser(){
this.buf = new char[0];
}
/**
* This is used to parse the <code>String</code> given to it. This
* will ensure that the <code>char</code> buffer has enough space
* to contain the characters from the <code>String</code>. This
* will firstly ensure that the buffer is resized if nessecary. The
* second step in this <code>parse</code> method is to initialize
* the <code>Parser</code> object so that multiple parse invocations
* can be made. The <code>init</code> method will reset this to an
* prepared state. Then finally the <code>parse</code> method is
* called to parse the <code>char</code> buffer.
*
* @param text the <code>String</code> to be parsed with this
* <code>Parser</code>
*/
public void parse(String text){
if(text != null){
ensureCapacity(text.length());
count = text.length();
text.getChars(0, count, buf,0);
init();
parse();
}
}
/**
* This ensure that there is enough space in the buffer to allow
* for more <code>char</code>'s to be added. If the buffer is
* already larger than min then the buffer will not be expanded
* at all.
*
* @param min the minimum size needed to accommodate the characters
*/
protected void ensureCapacity(int min) {
if(buf.length < min) {
int size = buf.length * 2;
int max = Math.max(min, size);
char[] temp = new char[max];
buf = temp;
}
}
/**
* This is used to determine if a given ISO-8859-1 character is
* a space character. That is a whitespace character this sees
* the, space, carriage return and line feed characters as
* whitespace characters.
*
* @param c the character that is being determined by this
*
* @return true if the character given it is a space character
*/
protected boolean space(char c) {
switch(c){
case ' ': case '\t':
case '\n': case '\r':
return true;
default:
return false;
}
}
/**
* This is used to determine weather or not a given character is
* a digit character. It assumes iso-8859-1 encoding to compare.
*
* @param c the character being determined by this method
*
* @return true if the character given is a digit character
*/
protected boolean digit(char c){
return c <= '9' && '0' <= c;
}
/**
* This takes a unicode character and assumes an encoding of
* ISO-8859-1. This then checks to see if the given character
* is uppercase if it is it converts it into is ISO-8859-1
* lowercase char.
*
* @param c the <code>char</code> to be converted to lowercase
*
* @return the lowercase ISO-8859-1 of the given character
*/
protected char toLower(char c) {
if(c >= 'A' && c <= 'Z') {
return (char)((c - 'A') + 'a');
}
return c;
}
/** This is used to skip an arbitrary <code>String</code> within the
* <code>char</code> buf. It checks the length of the <code>String</code>
* first to ensure that it will not go out of bounds. A comparison
* is then made with the buffers contents and the <code>String</code>
* if the reigon in the buffer matched the <code>String</code> then the
* offset within the buffer is increased by the <code>String</code>'s
* length so that it has effectively skipped it.
*
* @param text this is the <code>String</code> value to be skipped
*
* @return true if the <code>String</code> was skipped
*/
protected boolean skip(String text){
int size = text.length();
int read = 0;
if(off + size > count){
return false;
}
while(read < size){
char a = text.charAt(read);
char b = buf[off + read++];
if(toLower(a) != toLower(b)){
return false;
}
}
off += size;
return true;
}
/**
* This will initialize the <code>Parser</code> when it is ready
* to parse a new <code>String</code>. This will reset the
* <code>Parser</code> to a ready state. The <code>init</code>
* method is invoked by the <code>Parser</code> when the
* <code>parse</code> method is invoked.
*/
protected abstract void init();
/**
* This is the method that should be implemented to read
* the buf. This method should attempt to extract tokens
* from the buffer so that thes tokens may some how be
* used to determine the semantics. This method is invoked
* after the <code>init</code> method is invoked.
*/
protected abstract void parse();
}