| /* |
| * Parser.java February 2001 |
| * |
| * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| */ |
| |
| package org.simpleframework.common.parse; |
| |
| /** |
| * This <code>Parser</code> object is to be used as a simple template |
| * for parsing uncomplicated expressions. This object is used to parse |
| * a <code>String</code>. This provides a few methods that can be used |
| * to store and track the reading of data from a buffer. There are two |
| * abstract methods provided to allow this to be subclassed to create |
| * a <code>Parser</code> for a given <code>String</code>. |
| * |
| * @author Niall Gallagher |
| */ |
| public abstract class Parser { |
| |
| /** |
| * This is the buffer that is being parsed. |
| */ |
| protected char[] buf; |
| |
| /** |
| * This represents the current read offset. |
| */ |
| protected int off; |
| |
| /** |
| * This represents the length of the buffer. |
| */ |
| protected int count; |
| |
| /** |
| * This is a no argument constructor for the <code>Parser</code>. |
| * This will be invoked by each subclass of this object. It will |
| * set the buffer to a zero length buffer so that when the |
| * <code>ensureCapacity</code> method is used the buf's |
| * length can be checked. |
| */ |
| protected Parser(){ |
| this.buf = new char[0]; |
| } |
| |
| /** |
| * This is used to parse the <code>String</code> given to it. This |
| * will ensure that the <code>char</code> buffer has enough space |
| * to contain the characters from the <code>String</code>. This |
| * will firstly ensure that the buffer is resized if nessecary. The |
| * second step in this <code>parse</code> method is to initialize |
| * the <code>Parser</code> object so that multiple parse invocations |
| * can be made. The <code>init</code> method will reset this to an |
| * prepared state. Then finally the <code>parse</code> method is |
| * called to parse the <code>char</code> buffer. |
| * |
| * @param text the <code>String</code> to be parsed with this |
| * <code>Parser</code> |
| */ |
| public void parse(String text){ |
| if(text != null){ |
| ensureCapacity(text.length()); |
| count = text.length(); |
| text.getChars(0, count, buf,0); |
| init(); |
| parse(); |
| } |
| } |
| |
| /** |
| * This ensure that there is enough space in the buffer to allow |
| * for more <code>char</code>'s to be added. If the buffer is |
| * already larger than min then the buffer will not be expanded |
| * at all. |
| * |
| * @param min the minimum size needed to accommodate the characters |
| */ |
| protected void ensureCapacity(int min) { |
| if(buf.length < min) { |
| int size = buf.length * 2; |
| int max = Math.max(min, size); |
| char[] temp = new char[max]; |
| buf = temp; |
| } |
| } |
| |
| /** |
| * This is used to determine if a given ISO-8859-1 character is |
| * a space character. That is a whitespace character this sees |
| * the, space, carriage return and line feed characters as |
| * whitespace characters. |
| * |
| * @param c the character that is being determined by this |
| * |
| * @return true if the character given it is a space character |
| */ |
| protected boolean space(char c) { |
| switch(c){ |
| case ' ': case '\t': |
| case '\n': case '\r': |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /** |
| * This is used to determine weather or not a given character is |
| * a digit character. It assumes iso-8859-1 encoding to compare. |
| * |
| * @param c the character being determined by this method |
| * |
| * @return true if the character given is a digit character |
| */ |
| protected boolean digit(char c){ |
| return c <= '9' && '0' <= c; |
| } |
| |
| /** |
| * This takes a unicode character and assumes an encoding of |
| * ISO-8859-1. This then checks to see if the given character |
| * is uppercase if it is it converts it into is ISO-8859-1 |
| * lowercase char. |
| * |
| * @param c the <code>char</code> to be converted to lowercase |
| * |
| * @return the lowercase ISO-8859-1 of the given character |
| */ |
| protected char toLower(char c) { |
| if(c >= 'A' && c <= 'Z') { |
| return (char)((c - 'A') + 'a'); |
| } |
| return c; |
| } |
| |
| /** This is used to skip an arbitrary <code>String</code> within the |
| * <code>char</code> buf. It checks the length of the <code>String</code> |
| * first to ensure that it will not go out of bounds. A comparison |
| * is then made with the buffers contents and the <code>String</code> |
| * if the reigon in the buffer matched the <code>String</code> then the |
| * offset within the buffer is increased by the <code>String</code>'s |
| * length so that it has effectively skipped it. |
| * |
| * @param text this is the <code>String</code> value to be skipped |
| * |
| * @return true if the <code>String</code> was skipped |
| */ |
| protected boolean skip(String text){ |
| int size = text.length(); |
| int read = 0; |
| |
| if(off + size > count){ |
| return false; |
| } |
| while(read < size){ |
| char a = text.charAt(read); |
| char b = buf[off + read++]; |
| |
| if(toLower(a) != toLower(b)){ |
| return false; |
| } |
| } |
| off += size; |
| return true; |
| } |
| |
| /** |
| * This will initialize the <code>Parser</code> when it is ready |
| * to parse a new <code>String</code>. This will reset the |
| * <code>Parser</code> to a ready state. The <code>init</code> |
| * method is invoked by the <code>Parser</code> when the |
| * <code>parse</code> method is invoked. |
| */ |
| protected abstract void init(); |
| |
| /** |
| * This is the method that should be implemented to read |
| * the buf. This method should attempt to extract tokens |
| * from the buffer so that thes tokens may some how be |
| * used to determine the semantics. This method is invoked |
| * after the <code>init</code> method is invoked. |
| */ |
| protected abstract void parse(); |
| } |