blob: db862f67be529cad18665a3ad75f4497f50a1264 [file] [log] [blame]
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* JFlex 1.4.3 *
* Copyright (C) 1998-2009 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License. See the file *
* COPYRIGHT for more information. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along *
* with this program; if not, write to the Free Software Foundation, Inc., *
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
package JFlex;
import java.util.Vector;
/**
* Stores a regular expression of rules section in a JFlex-specification.
*
* This base class has no content other than its type.
*
* @author Gerwin Klein
* @version $Revision: 1.4.3 $, $Date: 2009/12/21 15:58:48 $
*/
public class RegExp {
/**
* The type of the regular expression. This field will be
* filled with values from class sym.java (generated by cup)
*/
int type;
/**
* Create a new regular expression of the specified type.
*
* @param type a value from the cup generated class sym.
*
* @see JFlex.sym
*/
public RegExp(int type) {
this.type = type;
}
/**
* Returns a String-representation of this regular expression
* with the specified indentation.
*
* @param tab a String that should contain only space characters and
* that is inserted in front of standard String-representation
* pf this object.
*/
public String print(String tab) {
return tab+toString();
}
/**
* Returns a String-representation of this regular expression
*/
public String toString() {
return "type = "+type;
}
/**
* Find out if this regexp is a char class or equivalent to one.
*
* @param macros for macro expansion
* @return true if the regexp is equivalent to a char class.
*/
public boolean isCharClass(Macros macros) {
RegExp1 unary;
RegExp2 binary;
switch (type) {
case sym.CHAR:
case sym.CHAR_I:
case sym.CCLASS:
case sym.CCLASSNOT:
return true;
case sym.BAR:
binary = (RegExp2) this;
return binary.r1.isCharClass(macros) && binary.r2.isCharClass(macros);
case sym.MACROUSE:
unary = (RegExp1) this;
return macros.getDefinition((String) unary.content).isCharClass(macros);
default: return false;
}
}
/**
* The approximate number of NFA states this expression will need (only
* works correctly after macro expansion and without negation)
*
* @param macros macro table for expansion
*/
public int size(Macros macros) {
RegExp1 unary;
RegExp2 binary;
RegExp content;
switch ( type ) {
case sym.BAR:
binary = (RegExp2) this;
return binary.r1.size(macros) + binary.r2.size(macros) + 2;
case sym.CONCAT:
binary = (RegExp2) this;
return binary.r1.size(macros) + binary.r2.size(macros);
case sym.STAR:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) + 2;
case sym.PLUS:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) + 2;
case sym.QUESTION:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros);
case sym.BANG:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) * content.size(macros);
// this is only a very rough estimate (worst case 2^n)
// exact size too complicated (propably requires construction)
case sym.TILDE:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) * content.size(macros) * 3;
// see sym.BANG
case sym.STRING:
case sym.STRING_I:
unary = (RegExp1) this;
return ((String) unary.content).length()+1;
case sym.CHAR:
case sym.CHAR_I:
return 2;
case sym.CCLASS:
case sym.CCLASSNOT:
return 2;
case sym.MACROUSE:
unary = (RegExp1) this;
return macros.getDefinition((String) unary.content).size(macros);
}
throw new Error("unknown regexp type "+type);
}
/**
* @return the reverse of the specified string.
*/
public final static String revString(String s) {
StringBuffer b = new StringBuffer(s.length());
for (int i=s.length()-1; i >= 0; i--) {
b.append(s.charAt(i));
}
return b.toString();
}
/**
* Recursively convert tilde (upto) expressions into negation and star.
*
* @param macros the macro table for expansion.
* @return new RegExp equivalent to the current one, but without upto expressions.
*/
public final RegExp resolveTilde(Macros macros) {
RegExp1 unary;
RegExp2 binary;
RegExp content;
switch ( type ) {
case sym.BAR:
binary = (RegExp2) this;
return new RegExp2(sym.BAR, binary.r1.resolveTilde(macros),
binary.r2.resolveTilde(macros));
case sym.CONCAT:
binary = (RegExp2) this;
return new RegExp2(sym.CONCAT, binary.r1.resolveTilde(macros),
binary.r2.resolveTilde(macros));
case sym.STAR:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.STAR, content.resolveTilde(macros));
case sym.PLUS:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.PLUS, content.resolveTilde(macros));
case sym.QUESTION:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.QUESTION, content.resolveTilde(macros));
case sym.BANG:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.BANG, content.resolveTilde(macros));
case sym.TILDE:
// ~a = !([^]* a [^]*) a
// uses subexpression sharing
unary = (RegExp1) this;
content = ((RegExp) unary.content).resolveTilde(macros);
RegExp any_star = new RegExp1(sym.STAR, anyChar());
RegExp neg = new RegExp1(sym.BANG,
new RegExp2(sym.CONCAT, any_star,
new RegExp2(sym.CONCAT, content, any_star)));
return new RegExp2(sym.CONCAT, neg, content);
case sym.STRING:
case sym.STRING_I:
case sym.CHAR:
case sym.CHAR_I:
case sym.CCLASS:
case sym.CCLASSNOT:
unary = (RegExp1) this;
return new RegExp1(unary.type, unary.content);
case sym.MACROUSE:
unary = (RegExp1) this;
return macros.getDefinition((String) unary.content).resolveTilde(macros);
}
throw new Error("unknown regexp type "+type);
}
/**
* Returns a regexp that matches any character: <code>[^]</code>
* @return the regexp for <code>[^]</code>
*/
public RegExp anyChar() {
// FIXME: there is some code duplication here with the parser
Vector list = new Vector();
list.addElement(new Interval((char)0,CharClasses.maxChar));
return new RegExp1(sym.CCLASS,list);
}
/**
* Create a new regexp that matches the reverse text of this one.
*
* @return the reverse regexp
*/
public final RegExp rev(Macros macros) {
RegExp1 unary;
RegExp2 binary;
RegExp content;
switch ( type ) {
case sym.BAR:
binary = (RegExp2) this;
return new RegExp2(sym.BAR, binary.r1.rev(macros), binary.r2.rev(macros));
case sym.CONCAT:
binary = (RegExp2) this;
return new RegExp2(sym.CONCAT, binary.r2.rev(macros), binary.r1.rev(macros));
case sym.STAR:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.STAR, content.rev(macros));
case sym.PLUS:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.PLUS, content.rev(macros));
case sym.QUESTION:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.QUESTION, content.rev(macros));
case sym.BANG:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return new RegExp1(sym.BANG, content.rev(macros));
case sym.TILDE:
content = resolveTilde(macros);
return content.rev(macros);
case sym.STRING:
case sym.STRING_I:
unary = (RegExp1) this;
return new RegExp1(unary.type, revString((String) unary.content));
case sym.CHAR:
case sym.CHAR_I:
case sym.CCLASS:
case sym.CCLASSNOT:
unary = (RegExp1) this;
return new RegExp1(unary.type, unary.content);
case sym.MACROUSE:
unary = (RegExp1) this;
return macros.getDefinition((String) unary.content).rev(macros);
}
throw new Error("unknown regexp type "+type);
}
}