src/com/sun/org/apache/regexp/internal/RE.java - platform/external/jetbrains/jdk8u_jaxp - Git at Google

 /*
  * reserved comment block
  * DO NOT REMOVE OR ALTER!
  */
 /*
  * Copyright 1999-2004 The Apache Software Foundation.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.sun.org.apache.regexp.internal;

 import java.io.Serializable;
 import java.util.Vector;

 /**
  * RE is an efficient, lightweight regular expression evaluator/matcher
  * class. Regular expressions are pattern descriptions which enable
  * sophisticated matching of strings.  In addition to being able to
  * match a string against a pattern, you can also extract parts of the
  * match.  This is especially useful in text parsing! Details on the
  * syntax of regular expression patterns are given below.
  *
  * <p>
  * To compile a regular expression (RE), you can simply construct an RE
  * matcher object from the string specification of the pattern, like this:
  *
  * <pre>
  *  RE r = new RE("a*b");
  * </pre>
  *
  * <p>
  * Once you have done this, you can call either of the RE.match methods to
  * perform matching on a String.  For example:
  *
  * <pre>
  *  boolean matched = r.match("aaaab");
  * </pre>
  *
  * will cause the boolean matched to be set to true because the
  * pattern "a*b" matches the string "aaaab".
  *
  * <p>
  * If you were interested in the <i>number</i> of a's which matched the
  * first part of our example expression, you could change the expression to
  * "(a*)b".  Then when you compiled the expression and matched it against
  * something like "xaaaab", you would get results like this:
  *
  * <pre>
  *  RE r = new RE("(a*)b");                  // Compile expression
  *  boolean matched = r.match("xaaaab");     // Match against "xaaaab"
  *
  *  String wholeExpr = r.getParen(0);        // wholeExpr will be 'aaaab'
  *  String insideParens = r.getParen(1);     // insideParens will be 'aaaa'
  *
  *  int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
  *  int endWholeExpr = r.getParenEnd(0);     // endWholeExpr will be index 6
  *  int lenWholeExpr = r.getParenLength(0);  // lenWholeExpr will be 5
  *
  *  int startInside = r.getParenStart(1);    // startInside will be index 1
  *  int endInside = r.getParenEnd(1);        // endInside will be index 5
  *  int lenInside = r.getParenLength(1);     // lenInside will be 4
  * </pre>
  *
  * You can also refer to the contents of a parenthesized expression
  * within a regular expression itself.  This is called a
  * 'backreference'.  The first backreference in a regular expression is
  * denoted by \1, the second by \2 and so on.  So the expression:
  *
  * <pre>
  *  ([0-9]+)=\1
  * </pre>
  *
  * will match any string of the form n=n (like 0=0 or 2=2).
  *
  * <p>
  * The full regular expression syntax accepted by RE is described here:
  *
  * <pre>
  *
  *  <b><font face=times roman>Characters</font></b>
  *
  *    <i>unicodeChar</i>   Matches any identical unicode character
  *    \                    Used to quote a meta-character (like '*')
  *    \\                   Matches a single '\' character
  *    \0nnn                Matches a given octal character
  *    \xhh                 Matches a given 8-bit hexadecimal character
  *    \\uhhhh              Matches a given 16-bit hexadecimal character
  *    \t                   Matches an ASCII tab character
  *    \n                   Matches an ASCII newline character
  *    \r                   Matches an ASCII return character
  *    \f                   Matches an ASCII form feed character
  *
  *
  *  <b><font face=times roman>Character Classes</font></b>
  *
  *    [abc]                Simple character class
  *    [a-zA-Z]             Character class with ranges
  *    [^abc]               Negated character class
  * </pre>
  *
  * <b>NOTE:</b> Incomplete ranges will be interpreted as &quot;starts
  * from zero&quot; or &quot;ends with last character&quot;.
  * <br>
  * I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF],
  * [-] means &quot;all characters&quot;.
  *
  * <pre>
  *
  *  <b><font face=times roman>Standard POSIX Character Classes</font></b>
  *
  *    [:alnum:]            Alphanumeric characters.
  *    [:alpha:]            Alphabetic characters.
  *    [:blank:]            Space and tab characters.
  *    [:cntrl:]            Control characters.
  *    [:digit:]            Numeric characters.
  *    [:graph:]            Characters that are printable and are also visible.
  *                         (A space is printable, but not visible, while an
  *                         `a' is both.)
  *    [:lower:]            Lower-case alphabetic characters.
  *    [:print:]            Printable characters (characters that are not
  *                         control characters.)
  *    [:punct:]            Punctuation characters (characters that are not letter,
  *                         digits, control characters, or space characters).
  *    [:space:]            Space characters (such as space, tab, and formfeed,
  *                         to name a few).
  *    [:upper:]            Upper-case alphabetic characters.
  *    [:xdigit:]           Characters that are hexadecimal digits.
  *
  *
  *  <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b>
  *
  *    [:javastart:]        Start of a Java identifier
  *    [:javapart:]         Part of a Java identifier
  *
  *
  *  <b><font face=times roman>Predefined Classes</font></b>
  *
  *    .         Matches any character other than newline
  *    \w        Matches a "word" character (alphanumeric plus "_")
  *    \W        Matches a non-word character
  *    \s        Matches a whitespace character
  *    \S        Matches a non-whitespace character
  *    \d        Matches a digit character
  *    \D        Matches a non-digit character
  *
  *
  *  <b><font face=times roman>Boundary Matchers</font></b>
  *
  *    ^         Matches only at the beginning of a line
  *    $         Matches only at the end of a line
  *    \b        Matches only at a word boundary
  *    \B        Matches only at a non-word boundary
  *
  *
  *  <b><font face=times roman>Greedy Closures</font></b>
  *
  *    A*        Matches A 0 or more times (greedy)
  *    A+        Matches A 1 or more times (greedy)
  *    A?        Matches A 1 or 0 times (greedy)
  *    A{n}      Matches A exactly n times (greedy)
  *    A{n,}     Matches A at least n times (greedy)
  *    A{n,m}    Matches A at least n but not more than m times (greedy)
  *
  *
  *  <b><font face=times roman>Reluctant Closures</font></b>
  *
  *    A*?       Matches A 0 or more times (reluctant)
  *    A+?       Matches A 1 or more times (reluctant)
  *    A??       Matches A 0 or 1 times (reluctant)
  *
  *
  *  <b><font face=times roman>Logical Operators</font></b>
  *
  *    AB        Matches A followed by B
  *    A|B       Matches either A or B
  *    (A)       Used for subexpression grouping
  *   (?:A)      Used for subexpression clustering (just like grouping but
  *              no backrefs)
  *
  *
  *  <b><font face=times roman>Backreferences</font></b>
  *
  *    \1    Backreference to 1st parenthesized subexpression
  *    \2    Backreference to 2nd parenthesized subexpression
  *    \3    Backreference to 3rd parenthesized subexpression
  *    \4    Backreference to 4th parenthesized subexpression
  *    \5    Backreference to 5th parenthesized subexpression
  *    \6    Backreference to 6th parenthesized subexpression
  *    \7    Backreference to 7th parenthesized subexpression
  *    \8    Backreference to 8th parenthesized subexpression
  *    \9    Backreference to 9th parenthesized subexpression
  * </pre>
  *
  * <p>
  * All closure operators (+, *, ?, {m,n}) are greedy by default, meaning
  * that they match as many elements of the string as possible without
  * causing the overall match to fail.  If you want a closure to be
  * reluctant (non-greedy), you can simply follow it with a '?'.  A
  * reluctant closure will match as few elements of the string as
  * possible when finding matches.  {m,n} closures don't currently
  * support reluctancy.
  *
  * <p>
  * <b><font face="times roman">Line terminators</font></b>
  * <br>
  * A line terminator is a one- or two-character sequence that marks
  * the end of a line of the input character sequence. The following
  * are recognized as line terminators:
  * <ul>
  * <li>A newline (line feed) character ('\n'),</li>
  * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
  * <li>A standalone carriage-return character ('\r'),</li>
  * <li>A next-line character ('\u0085'),</li>
  * <li>A line-separator character ('\u2028'), or</li>
  * <li>A paragraph-separator character ('\u2029).</li>
  * </ul>
  *
  * <p>
  * RE runs programs compiled by the RECompiler class.  But the RE
  * matcher class does not include the actual regular expression compiler
  * for reasons of efficiency.  In fact, if you want to pre-compile one
  * or more regular expressions, the 'recompile' class can be invoked
  * from the command line to produce compiled output like this:
  *
  * <pre>
  *    // Pre-compiled regular expression "a*b"
  *    char[] re1Instructions =
  *    {
  *        0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
  *        0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
  *        0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
  *        0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
  *        0x0000,
  *    };
  *
  *
  *    REProgram re1 = new REProgram(re1Instructions);
  * </pre>
  *
  * You can then construct a regular expression matcher (RE) object from
  * the pre-compiled expression re1 and thus avoid the overhead of
  * compiling the expression at runtime. If you require more dynamic
  * regular expressions, you can construct a single RECompiler object and
  * re-use it to compile each expression. Similarly, you can change the
  * program run by a given matcher object at any time. However, RE and
  * RECompiler are not threadsafe (for efficiency reasons, and because
  * requiring thread safety in this class is deemed to be a rare
  * requirement), so you will need to construct a separate compiler or
  * matcher object for each thread (unless you do thread synchronization
  * yourself). Once expression compiled into the REProgram object, REProgram
  * can be safely shared across multiple threads and RE objects.
  *
  * <br><p><br>
  *
  * <font color="red">
  * <i>ISSUES:</i>
  *
  * <ul>
  *  <li>com.weusours.util.re is not currently compatible with all
  *      standard POSIX regcomp flags</li>
  *  <li>com.weusours.util.re does not support POSIX equivalence classes
  *      ([=foo=] syntax) (I18N/locale issue)</li>
  *  <li>com.weusours.util.re does not support nested POSIX character
  *      classes (definitely should, but not completely trivial)</li>
  *  <li>com.weusours.util.re Does not support POSIX character collation
  *      concepts ([.foo.] syntax) (I18N/locale issue)</li>
  *  <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li>
  *  <li>Should RE support character iterators (for backwards RE matching!)?</li>
  *  <li>Should RE support reluctant {m,n} closures (does anyone care)?</li>
  *  <li>Not *all* possibilities are considered for greediness when backreferences
  *      are involved (as POSIX suggests should be the case).  The POSIX RE
  *      "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match
  *      of acdacaa where \1 is "a".  This is not the case in this RE package,
  *      and actually Perl doesn't go to this extent either!  Until someone
  *      actually complains about this, I'm not sure it's worth "fixing".
  *      If it ever is fixed, test #137 in RETest.txt should be updated.</li>
  * </ul>
  *
  * </font>
  *
  * @see recompile
  * @see RECompiler
  *
  * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  * @author <a href="mailto:ts@sch-fer.de">Tobias Sch&auml;fer</a>
  */
 public class RE implements Serializable
 {
     /**
      * Specifies normal, case-sensitive matching behaviour.
      */
     public static final int MATCH_NORMAL          = 0x0000;

     /**
      * Flag to indicate that matching should be case-independent (folded)
      */
     public static final int MATCH_CASEINDEPENDENT = 0x0001;

     /**
      * Newlines should match as BOL/EOL (^ and $)
      */
     public static final int MATCH_MULTILINE       = 0x0002;

     /**
      * Consider all input a single body of text - newlines are matched by .
      */
     public static final int MATCH_SINGLELINE      = 0x0004;

     /************************************************
      *                                              *
      * The format of a node in a program is:        *
      *                                              *
      * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *
      *                                              *
      * char OPCODE - instruction                    *
      * char OPDATA - modifying data                 *
      * char OPNEXT - next node (relative offset)    *
      *                                              *
      ************************************************/

                  //   Opcode              Char       Opdata/Operand  Meaning
                  //   ----------          ---------- --------------- --------------------------------------------------
     static final char OP_END              = 'E';  //                 end of program
     static final char OP_BOL              = '^';  //                 match only if at beginning of line
     static final char OP_EOL              = '$';  //                 match only if at end of line
     static final char OP_ANY              = '.';  //                 match any single character except newline
     static final char OP_ANYOF            = '[';  // count/ranges    match any char in the list of ranges
     static final char OP_BRANCH           = '|';  // node            match this alternative or the next one
     static final char OP_ATOM             = 'A';  // length/string   length of string followed by string itself
     static final char OP_STAR             = '*';  // node            kleene closure
     static final char OP_PLUS             = '+';  // node            positive closure
     static final char OP_MAYBE            = '?';  // node            optional closure
     static final char OP_ESCAPE           = '\\'; // escape          special escape code char class (escape is E_* code)
     static final char OP_OPEN             = '(';  // number          nth opening paren
     static final char OP_OPEN_CLUSTER     = '<';  //                 opening cluster
     static final char OP_CLOSE            = ')';  // number          nth closing paren
     static final char OP_CLOSE_CLUSTER    = '>';  //                 closing cluster
     static final char OP_BACKREF          = '#';  // number          reference nth already matched parenthesized string
     static final char OP_GOTO             = 'G';  //                 nothing but a (back-)pointer
     static final char OP_NOTHING          = 'N';  //                 match null string such as in '(a|)'
     static final char OP_RELUCTANTSTAR    = '8';  // none/expr       reluctant '*' (mnemonic for char is unshifted '*')
     static final char OP_RELUCTANTPLUS    = '=';  // none/expr       reluctant '+' (mnemonic for char is unshifted '+')
     static final char OP_RELUCTANTMAYBE   = '/';  // none/expr       reluctant '?' (mnemonic for char is unshifted '?')
     static final char OP_POSIXCLASS       = 'P';  // classid         one of the posix character classes

     // Escape codes
     static final char E_ALNUM             = 'w';  // Alphanumeric
     static final char E_NALNUM            = 'W';  // Non-alphanumeric
     static final char E_BOUND             = 'b';  // Word boundary
     static final char E_NBOUND            = 'B';  // Non-word boundary
     static final char E_SPACE             = 's';  // Whitespace
     static final char E_NSPACE            = 'S';  // Non-whitespace
     static final char E_DIGIT             = 'd';  // Digit
     static final char E_NDIGIT            = 'D';  // Non-digit

     // Posix character classes
     static final char POSIX_CLASS_ALNUM   = 'w';  // Alphanumerics
     static final char POSIX_CLASS_ALPHA   = 'a';  // Alphabetics
     static final char POSIX_CLASS_BLANK   = 'b';  // Blanks
     static final char POSIX_CLASS_CNTRL   = 'c';  // Control characters
     static final char POSIX_CLASS_DIGIT   = 'd';  // Digits
     static final char POSIX_CLASS_GRAPH   = 'g';  // Graphic characters
     static final char POSIX_CLASS_LOWER   = 'l';  // Lowercase characters
     static final char POSIX_CLASS_PRINT   = 'p';  // Printable characters
     static final char POSIX_CLASS_PUNCT   = '!';  // Punctuation
     static final char POSIX_CLASS_SPACE   = 's';  // Spaces
     static final char POSIX_CLASS_UPPER   = 'u';  // Uppercase characters
     static final char POSIX_CLASS_XDIGIT  = 'x';  // Hexadecimal digits
     static final char POSIX_CLASS_JSTART  = 'j';  // Java identifier start
     static final char POSIX_CLASS_JPART   = 'k';  // Java identifier part

     // Limits
     static final int maxNode  = 65536;            // Maximum number of nodes in a program
     static final int MAX_PAREN = 16;              // Number of paren pairs (only 9 can be backrefs)

     // Node layout constants
     static final int offsetOpcode = 0;            // Opcode offset (first character)
     static final int offsetOpdata = 1;            // Opdata offset (second char)
     static final int offsetNext   = 2;            // Next index offset (third char)
     static final int nodeSize     = 3;            // Node size (in chars)

     // State of current program
     REProgram program;                            // Compiled regular expression 'program'
     transient CharacterIterator search;           // The string being matched against
     int matchFlags;                               // Match behaviour flags
     int maxParen = MAX_PAREN;

     // Parenthesized subexpressions
     transient int parenCount;                     // Number of subexpressions matched (num open parens + 1)
     transient int start0;                         // Cache of start[0]
     transient int end0;                           // Cache of start[0]
     transient int start1;                         // Cache of start[1]
     transient int end1;                           // Cache of start[1]
     transient int start2;                         // Cache of start[2]
     transient int end2;                           // Cache of start[2]
     transient int[] startn;                       // Lazy-alloced array of sub-expression starts
     transient int[] endn;                         // Lazy-alloced array of sub-expression ends

     // Backreferences
     transient int[] startBackref;                 // Lazy-alloced array of backref starts
     transient int[] endBackref;                   // Lazy-alloced array of backref ends

     /**
      * Constructs a regular expression matcher from a String by compiling it
      * using a new instance of RECompiler.  If you will be compiling many
      * expressions, you may prefer to use a single RECompiler object instead.
      *
      * @param pattern The regular expression pattern to compile.
      * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
      * @see RECompiler
      * @see recompile
      */
     public RE(String pattern) throws RESyntaxException
     {
         this(pattern, MATCH_NORMAL);
     }

     /**
      * Constructs a regular expression matcher from a String by compiling it
      * using a new instance of RECompiler.  If you will be compiling many
      * expressions, you may prefer to use a single RECompiler object instead.
      *
      * @param pattern The regular expression pattern to compile.
      * @param matchFlags The matching style
      * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
      * @see RECompiler
      * @see recompile
      */
     public RE(String pattern, int matchFlags) throws RESyntaxException
     {
         this(new RECompiler().compile(pattern));
         setMatchFlags(matchFlags);
     }

     /**
      * Construct a matcher for a pre-compiled regular expression from program
      * (bytecode) data.  Permits special flags to be passed in to modify matching
      * behaviour.
      *
      * @param program Compiled regular expression program (see RECompiler and/or recompile)
      * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
      *
      * <pre>
      *   MATCH_NORMAL              // Normal (case-sensitive) matching
      *   MATCH_CASEINDEPENDENT     // Case folded comparisons
      *   MATCH_MULTILINE           // Newline matches as BOL/EOL
      * </pre>
      *
      * @see RECompiler
      * @see REProgram
      * @see recompile
      */
     public RE(REProgram program, int matchFlags)
     {
         setProgram(program);
         setMatchFlags(matchFlags);
     }

     /**
      * Construct a matcher for a pre-compiled regular expression from program
      * (bytecode) data.
      *
      * @param program Compiled regular expression program
      * @see RECompiler
      * @see recompile
      */
     public RE(REProgram program)
     {
         this(program, MATCH_NORMAL);
     }

     /**
      * Constructs a regular expression matcher with no initial program.
      * This is likely to be an uncommon practice, but is still supported.
      */
     public RE()
     {
         this((REProgram)null, MATCH_NORMAL);
     }

     /**
      * Converts a 'simplified' regular expression to a full regular expression
      *
      * @param pattern The pattern to convert
      * @return The full regular expression
      */
     public static String simplePatternToFullRegularExpression(String pattern)
     {
         StringBuffer buf = new StringBuffer();
         for (int i = 0; i < pattern.length(); i++)
         {
             char c = pattern.charAt(i);
             switch (c)
             {
                 case '*':
                     buf.append(".*");
                     break;

                 case '.':
                 case '[':
                 case ']':
                 case '\\':
                 case '+':
                 case '?':
                 case '{':
                 case '}':
                 case '$':
                 case '^':
                 case '|':
                 case '(':
                 case ')':
                     buf.append('\\');
                 default:
                     buf.append(c);
                     break;
             }
         }
         return buf.toString();
     }

     /**
      * Sets match behaviour flags which alter the way RE does matching.
      * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
      *
      * <pre>
      *   MATCH_NORMAL              // Normal (case-sensitive) matching
      *   MATCH_CASEINDEPENDENT     // Case folded comparisons
      *   MATCH_MULTILINE           // Newline matches as BOL/EOL
      * </pre>
      */
     public void setMatchFlags(int matchFlags)
     {
         this.matchFlags = matchFlags;
     }

     /**
      * Returns the current match behaviour flags.
      * @return Current match behaviour flags (RE.MATCH_*).
      *
      * <pre>
      *   MATCH_NORMAL              // Normal (case-sensitive) matching
      *   MATCH_CASEINDEPENDENT     // Case folded comparisons
      *   MATCH_MULTILINE           // Newline matches as BOL/EOL
      * </pre>
      *
      * @see #setMatchFlags
      */
     public int getMatchFlags()
     {
         return matchFlags;
     }

     /**
      * Sets the current regular expression program used by this matcher object.
      *
      * @param program Regular expression program compiled by RECompiler.
      * @see RECompiler
      * @see REProgram
      * @see recompile
      */
     public void setProgram(REProgram program)
     {
         this.program = program;
         if (program != null && program.maxParens != -1) {
             this.maxParen = program.maxParens;
         } else {
             this.maxParen = MAX_PAREN;
         }
     }

     /**
      * Returns the current regular expression program in use by this matcher object.
      *
      * @return Regular expression program
      * @see #setProgram
      */
     public REProgram getProgram()
     {
         return program;
     }

     /**
      * Returns the number of parenthesized subexpressions available after a successful match.
      *
      * @return Number of available parenthesized subexpressions
      */
     public int getParenCount()
     {
         return parenCount;
     }

     /**
      * Gets the contents of a parenthesized subexpression after a successful match.
      *
      * @param which Nesting level of subexpression
      * @return String
      */
     public String getParen(int which)
     {
         int start;
         if (which < parenCount && (start = getParenStart(which)) >= 0)
         {
             return search.substring(start, getParenEnd(which));
         }
         return null;
     }

     /**
      * Returns the start index of a given paren level.
      *
      * @param which Nesting level of subexpression
      * @return String index
      */
     public final int getParenStart(int which)
     {
         if (which < parenCount)
         {
             switch (which)
             {
                 case 0:
                     return start0;

                 case 1:
                     return start1;

                 case 2:
                     return start2;

                 default:
                     if (startn == null)
                     {
                         allocParens();
                     }
                     return startn[which];
             }
         }
         return -1;
     }

     /**
      * Returns the end index of a given paren level.
      *
      * @param which Nesting level of subexpression
      * @return String index
      */
     public final int getParenEnd(int which)
     {
         if (which < parenCount)
         {
             switch (which)
             {
                 case 0:
                     return end0;

                 case 1:
                     return end1;

                 case 2:
                     return end2;

                 default:
                     if (endn == null)
                     {
                         allocParens();
                     }
                     return endn[which];
             }
         }
         return -1;
     }

     /**
      * Returns the length of a given paren level.
      *
      * @param which Nesting level of subexpression
      * @return Number of characters in the parenthesized subexpression
      */
     public final int getParenLength(int which)
     {
         if (which < parenCount)
         {
             return getParenEnd(which) - getParenStart(which);
         }
         return -1;
     }

     /**
      * Sets the start of a paren level
      *
      * @param which Which paren level
      * @param i Index in input array
      */
     protected final void setParenStart(int which, int i)
     {
         if (which < parenCount)
         {
             switch (which)
             {
                 case 0:
                     start0 = i;
                     break;

                 case 1:
                     start1 = i;
                     break;

                 case 2:
                     start2 = i;
                     break;

                 default:
                     if (startn == null)
                     {
                         allocParens();
                     }
                     startn[which] = i;
                     break;
             }
         }
     }

     /**
      * Sets the end of a paren level
      *
      * @param which Which paren level
      * @param i Index in input array
      */
     protected final void setParenEnd(int which, int i)
     {
         if (which < parenCount)
         {
             switch (which)
             {
                 case 0:
                     end0 = i;
                     break;

                 case 1:
                     end1 = i;
                     break;

                 case 2:
                     end2 = i;
                     break;

                 default:
                     if (endn == null)
                     {
                         allocParens();
                     }
                     endn[which] = i;
                     break;
             }
         }
     }

     /**
      * Throws an Error representing an internal error condition probably resulting
      * from a bug in the regular expression compiler (or possibly data corruption).
      * In practice, this should be very rare.
      *
      * @param s Error description
      */
     protected void internalError(String s) throws Error
     {
         throw new Error("RE internal error: " + s);
     }

     /**
      * Performs lazy allocation of subexpression arrays
      */
     private final void allocParens()
     {
         // Allocate arrays for subexpressions
         startn = new int[maxParen];
         endn = new int[maxParen];

         // Set sub-expression pointers to invalid values
         for (int i = 0; i < maxParen; i++)
         {
             startn[i] = -1;
             endn[i] = -1;
         }
     }

     /**
      * Try to match a string against a subset of nodes in the program
      *
      * @param firstNode Node to start at in program
      * @param lastNode  Last valid node (used for matching a subexpression without
      *                  matching the rest of the program as well).
      * @param idxStart  Starting position in character array
      * @return Final input array index if match succeeded.  -1 if not.
      */
     protected int matchNodes(int firstNode, int lastNode, int idxStart)
     {
         // Our current place in the string
         int idx = idxStart;

         // Loop while node is valid
         int next, opcode, opdata;
         int idxNew;
         char[] instruction = program.instruction;
         for (int node = firstNode; node < lastNode; )
         {
             opcode = instruction[node + offsetOpcode];
             next   = node + (short)instruction[node + offsetNext];
             opdata = instruction[node + offsetOpdata];

             switch (opcode)
             {
                 case OP_RELUCTANTMAYBE:
                     {
                         int once = 0;
                         do
                         {
                             // Try to match the rest without using the reluctant subexpr
                             if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                             {
                                 return idxNew;
                             }
                         }
                         while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1);
                         return -1;
                     }

                 case OP_RELUCTANTPLUS:
                     while ((idx = matchNodes(node + nodeSize, next, idx)) != -1)
                     {
                         // Try to match the rest without using the reluctant subexpr
                         if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                         {
                             return idxNew;
                         }
                     }
                     return -1;

                 case OP_RELUCTANTSTAR:
                     do
                     {
                         // Try to match the rest without using the reluctant subexpr
                         if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                         {
                             return idxNew;
                         }
                     }
                     while ((idx = matchNodes(node + nodeSize, next, idx)) != -1);
                     return -1;

                 case OP_OPEN:

                     // Match subexpression
                     if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
                     {
                         startBackref[opdata] = idx;
                     }
                     if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                     {
                         // Increase valid paren count
                         if ((opdata + 1) > parenCount)
                         {
                             parenCount = opdata + 1;
                         }

                         // Don't set paren if already set later on
                         if (getParenStart(opdata) == -1)
                         {
                             setParenStart(opdata, idx);
                         }
                     }
                     return idxNew;

                 case OP_CLOSE:

                     // Done matching subexpression
                     if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
                     {
                         endBackref[opdata] = idx;
                     }
                     if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                     {
                         // Increase valid paren count
                         if ((opdata + 1) > parenCount)
                         {
                             parenCount = opdata + 1;
                         }

                         // Don't set paren if already set later on
                         if (getParenEnd(opdata) == -1)
                         {
                             setParenEnd(opdata, idx);
                         }
                     }
                     return idxNew;

                 case OP_OPEN_CLUSTER:
                 case OP_CLOSE_CLUSTER:
                     // starting or ending the matching of a subexpression which has no backref.
                     return matchNodes( next, maxNode, idx );

                 case OP_BACKREF:
                     {
                         // Get the start and end of the backref
                         int s = startBackref[opdata];
                         int e = endBackref[opdata];

                         // We don't know the backref yet
                         if (s == -1 || e == -1)
                         {
                             return -1;
                         }

                         // The backref is empty size
                         if (s == e)
                         {
                             break;
                         }

                         // Get the length of the backref
                         int l = e - s;

                         // If there's not enough input left, give up.
                         if (search.isEnd(idx + l - 1))
                         {
                             return -1;
                         }

                         // Case fold the backref?
                         final boolean caseFold =
                             ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
                         // Compare backref to input
                         for (int i = 0; i < l; i++)
                         {
                             if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0)
                             {
                                 return -1;
                             }
                         }
                     }
                     break;

                 case OP_BOL:

                     // Fail if we're not at the start of the string
                     if (idx != 0)
                     {
                         // If we're multiline matching, we could still be at the start of a line
                         if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
                         {
                             // If not at start of line, give up
                             if (idx <= 0 || !isNewline(idx - 1)) {
                                 return -1;
                             } else {
                                 break;
                             }
                         }
                         return -1;
                     }
                     break;

                 case OP_EOL:

                     // If we're not at the end of string
                     if (!search.isEnd(0) && !search.isEnd(idx))
                     {
                         // If we're multi-line matching
                         if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
                         {
                             // Give up if we're not at the end of a line
                             if (!isNewline(idx)) {
                                 return -1;
                             } else {
                                 break;
                             }
                         }
                         return -1;
                     }
                     break;

                 case OP_ESCAPE:

                     // Which escape?
                     switch (opdata)
                     {
                         // Word boundary match
                         case E_NBOUND:
                         case E_BOUND:
                             {
                                 char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1));
                                 char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
                                 if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND))
                                 {
                                     return -1;
                                 }
                             }
                             break;

                         // Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit
                         case E_ALNUM:
                         case E_NALNUM:
                         case E_DIGIT:
                         case E_NDIGIT:
                         case E_SPACE:
                         case E_NSPACE:

                             // Give up if out of input
                             if (search.isEnd(idx))
                             {
                                 return -1;
                             }

                             char c = search.charAt(idx);

                             // Switch on escape
                             switch (opdata)
                             {
                                 case E_ALNUM:
                                 case E_NALNUM:
                                     if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM)))
                                     {
                                         return -1;
                                     }
                                     break;

                                 case E_DIGIT:
                                 case E_NDIGIT:
                                     if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
                                     {
                                         return -1;
                                     }
                                     break;

                                 case E_SPACE:
                                 case E_NSPACE:
                                     if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
                                     {
                                         return -1;
                                     }
                                     break;
                             }
                             idx++;
                             break;

                         default:
                             internalError("Unrecognized escape '" + opdata + "'");
                     }
                     break;

                 case OP_ANY:

                     if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
                         // Match anything
                         if (search.isEnd(idx))
                         {
                             return -1;
                         }
                     }
                     else
                     {
                         // Match anything but a newline
                         if (search.isEnd(idx) || isNewline(idx))
                         {
                             return -1;
                         }
                     }
                     idx++;
                     break;

                 case OP_ATOM:
                     {
                         // Match an atom value
                         if (search.isEnd(idx))
                         {
                             return -1;
                         }

                         // Get length of atom and starting index
                         int lenAtom = opdata;
                         int startAtom = node + nodeSize;

                         // Give up if not enough input remains to have a match
                         if (search.isEnd(lenAtom + idx - 1))
                         {
                             return -1;
                         }

                         // Match atom differently depending on casefolding flag
                         final boolean caseFold =
                             ((matchFlags & MATCH_CASEINDEPENDENT) != 0);

                         for (int i = 0; i < lenAtom; i++)
                         {
                             if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0)
                             {
                                 return -1;
                             }
                         }
                     }
                     break;

                 case OP_POSIXCLASS:
                     {
                         // Out of input?
                         if (search.isEnd(idx))
                         {
                             return -1;
                         }

                         switch (opdata)
                         {
                             case POSIX_CLASS_ALNUM:
                                 if (!Character.isLetterOrDigit(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_ALPHA:
                                 if (!Character.isLetter(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_DIGIT:
                                 if (!Character.isDigit(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_BLANK: // JWL - bugbug: is this right??
                                 if (!Character.isSpaceChar(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_SPACE:
                                 if (!Character.isWhitespace(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_CNTRL:
                                 if (Character.getType(search.charAt(idx)) != Character.CONTROL)
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_GRAPH: // JWL - bugbug???
                                 switch (Character.getType(search.charAt(idx)))
                                 {
                                     case Character.MATH_SYMBOL:
                                     case Character.CURRENCY_SYMBOL:
                                     case Character.MODIFIER_SYMBOL:
                                     case Character.OTHER_SYMBOL:
                                         break;

                                     default:
                                         return -1;
                                 }
                                 break;

                             case POSIX_CLASS_LOWER:
                                 if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER)
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_UPPER:
                                 if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER)
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_PRINT:
                                 if (Character.getType(search.charAt(idx)) == Character.CONTROL)
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_PUNCT:
                             {
                                 int type = Character.getType(search.charAt(idx));
                                 switch(type)
                                 {
                                     case Character.DASH_PUNCTUATION:
                                     case Character.START_PUNCTUATION:
                                     case Character.END_PUNCTUATION:
                                     case Character.CONNECTOR_PUNCTUATION:
                                     case Character.OTHER_PUNCTUATION:
                                         break;

                                     default:
                                         return -1;
                                 }
                             }
                             break;

                             case POSIX_CLASS_XDIGIT: // JWL - bugbug??
                             {
                                 boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') ||
                                                     (search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') ||
                                                     (search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F'));
                                 if (!isXDigit)
                                 {
                                     return -1;
                                 }
                             }
                             break;

                             case POSIX_CLASS_JSTART:
                                 if (!Character.isJavaIdentifierStart(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             case POSIX_CLASS_JPART:
                                 if (!Character.isJavaIdentifierPart(search.charAt(idx)))
                                 {
                                     return -1;
                                 }
                                 break;

                             default:
                                 internalError("Bad posix class");
                                 break;
                         }

                         // Matched.
                         idx++;
                     }
                     break;

                 case OP_ANYOF:
                     {
                         // Out of input?
                         if (search.isEnd(idx))
                         {
                             return -1;
                         }

                         // Get character to match against character class and maybe casefold
                         char c = search.charAt(idx);
                         boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
                         // Loop through character class checking our match character
                         int idxRange = node + nodeSize;
                         int idxEnd = idxRange + (opdata * 2);
                         boolean match = false;
                         for (int i = idxRange; !match && i < idxEnd; )
                         {
                             // Get start, end and match characters
                             char s = instruction[i++];
                             char e = instruction[i++];

                             match = ((compareChars(c, s, caseFold) >= 0)
                                      && (compareChars(c, e, caseFold) <= 0));
                         }

                         // Fail if we didn't match the character class
                         if (!match)
                         {
                             return -1;
                         }
                         idx++;
                     }
                     break;

                 case OP_BRANCH:
                 {
                     // Check for choices
                     if (instruction[next + offsetOpcode] != OP_BRANCH)
                     {
                         // If there aren't any other choices, just evaluate this branch.
                         node += nodeSize;
                         continue;
                     }

                     // Try all available branches
                     short nextBranch;
                     do
                     {
                         // Try matching the branch against the string
                         if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
                         {
                             return idxNew;
                         }

                         // Go to next branch (if any)
                         nextBranch = (short)instruction[node + offsetNext];
                         node += nextBranch;
                     }
                     while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH));

                     // Failed to match any branch!
                     return -1;
                 }

                 case OP_NOTHING:
                 case OP_GOTO:

                     // Just advance to the next node without doing anything
                     break;

                 case OP_END:

                     // Match has succeeded!
                     setParenEnd(0, idx);
                     return idx;

                 default:

                     // Corrupt program
                     internalError("Invalid opcode '" + opcode + "'");
             }

             // Advance to the next node in the program
             node = next;
         }

         // We "should" never end up here
         internalError("Corrupt program");
         return -1;
     }

     /**
      * Match the current regular expression program against the current
      * input string, starting at index i of the input string.  This method
      * is only meant for internal use.
      *
      * @param i The input string index to start matching at
      * @return True if the input matched the expression
      */
     protected boolean matchAt(int i)
     {
         // Initialize start pointer, paren cache and paren count
         start0 = -1;
         end0   = -1;
         start1 = -1;
         end1   = -1;
         start2 = -1;
         end2   = -1;
         startn = null;
         endn   = null;
         parenCount = 1;
         setParenStart(0, i);

         // Allocate backref arrays (unless optimizations indicate otherwise)
         if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
         {
             startBackref = new int[maxParen];
             endBackref = new int[maxParen];
         }

         // Match against string
         int idx;
         if ((idx = matchNodes(0, maxNode, i)) != -1)
         {
             setParenEnd(0, idx);
             return true;
         }

         // Didn't match
         parenCount = 0;
         return false;
     }

     /**
      * Matches the current regular expression program against a character array,
      * starting at a given index.
      *
      * @param search String to match against
      * @param i Index to start searching at
      * @return True if string matched
      */
     public boolean match(String search, int i)
     {
         return match(new StringCharacterIterator(search), i);
     }

     /**
      * Matches the current regular expression program against a character array,
      * starting at a given index.
      *
      * @param search String to match against
      * @param i Index to start searching at
      * @return True if string matched
      */
     public boolean match(CharacterIterator search, int i)
     {
         // There is no compiled program to search with!
         if (program == null)
         {
             // This should be uncommon enough to be an error case rather
             // than an exception (which would have to be handled everywhere)
             internalError("No RE program to run!");
         }

         // Save string to search
         this.search = search;

         // Can we optimize the search by looking for a prefix string?
         if (program.prefix == null)
         {
             // Unprefixed matching must try for a match at each character
             for ( ;! search.isEnd(i - 1); i++)
             {
                 // Try a match at index i
                 if (matchAt(i))
                 {
                     return true;
                 }
             }
             return false;
         }
         else
         {
             // Prefix-anchored matching is possible
             boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
             char[] prefix = program.prefix;
             for ( ; !search.isEnd(i + prefix.length - 1); i++)
             {
                 int j = i;
                 int k = 0;

                 boolean match;
                 do {
                     // If there's a mismatch of any character in the prefix, give up
                     match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0);
                 } while (match && k < prefix.length);

                 // See if the whole prefix string matched
                 if (k == prefix.length)
                 {
                     // We matched the full prefix at firstChar, so try it
                     if (matchAt(i))
                     {
                         return true;
                     }
                 }
             }
             return false;
         }
     }

     /**
      * Matches the current regular expression program against a String.
      *
      * @param search String to match against
      * @return True if string matched
      */
     public boolean match(String search)
     {
         return match(search, 0);
     }

     /**
      * Splits a string into an array of strings on regular expression boundaries.
      * This function works the same way as the Perl function of the same name.
      * Given a regular expression of "[ab]+" and a string to split of
      * "xyzzyababbayyzabbbab123", the result would be the array of Strings
      * "[xyzzy, yyz, 123]".
      *
      * <p>Please note that the first string in the resulting array may be an empty
      * string. This happens when the very first character of input string is
      * matched by the pattern.
      *
      * @param s String to split on this regular exression
      * @return Array of strings
      */
     public String[] split(String s)
     {
         // Create new vector
         Vector v = new Vector();

         // Start at position 0 and search the whole string
         int pos = 0;
         int len = s.length();

         // Try a match at each position
         while (pos < len && match(s, pos))
         {
             // Get start of match
             int start = getParenStart(0);

             // Get end of match
             int newpos = getParenEnd(0);

             // Check if no progress was made
             if (newpos == pos)
             {
                 v.addElement(s.substring(pos, start + 1));
                 newpos++;
             }
             else
             {
                 v.addElement(s.substring(pos, start));
             }

             // Move to new position
             pos = newpos;
         }

         // Push remainder if it's not empty
         String remainder = s.substring(pos);
         if (remainder.length() != 0)
         {
             v.addElement(remainder);
         }

         // Return vector as an array of strings
         String[] ret = new String[v.size()];
         v.copyInto(ret);
         return ret;
     }

     /**
      * Flag bit that indicates that subst should replace all occurrences of this
      * regular expression.
      */
     public static final int REPLACE_ALL            = 0x0000;

     /**
      * Flag bit that indicates that subst should only replace the first occurrence
      * of this regular expression.
      */
     public static final int REPLACE_FIRSTONLY      = 0x0001;

     /**
      * Flag bit that indicates that subst should replace backreferences
      */
     public static final int REPLACE_BACKREFERENCES = 0x0002;

     /**
      * Substitutes a string for this regular expression in another string.
      * This method works like the Perl function of the same name.
      * Given a regular expression of "a*b", a String to substituteIn of
      * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
      * resulting String returned by subst would be "-foo-garply-wacky-".
      *
      * @param substituteIn String to substitute within
      * @param substitution String to substitute for all matches of this regular expression.
      * @return The string substituteIn with zero or more occurrences of the current
      * regular expression replaced with the substitution String (if this regular
      * expression object doesn't match at any position, the original String is returned
      * unchanged).
      */
     public String subst(String substituteIn, String substitution)
     {
         return subst(substituteIn, substitution, REPLACE_ALL);
     }

     /**
      * Substitutes a string for this regular expression in another string.
      * This method works like the Perl function of the same name.
      * Given a regular expression of "a*b", a String to substituteIn of
      * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
      * resulting String returned by subst would be "-foo-garply-wacky-".
      * <p>
      * It is also possible to reference the contents of a parenthesized expression
      * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
      * a String to substituteIn of "visit us: http://www.apache.org!" and the
      * substitution String "&lt;a href=\"$0\"&gt;$0&lt;/a&gt;", the resulting String
      * returned by subst would be
      * "visit us: &lt;a href=\"http://www.apache.org\"&gt;http://www.apache.org&lt;/a&gt;!".
      * <p>
      * <i>Note:</i> $0 represents the whole match.
      *
      * @param substituteIn String to substitute within
      * @param substitution String to substitute for matches of this regular expression
      * @param flags One or more bitwise flags from REPLACE_*.  If the REPLACE_FIRSTONLY
      * flag bit is set, only the first occurrence of this regular expression is replaced.
      * If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be
      * replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will
      * be processed.
      * @return The string substituteIn with zero or more occurrences of the current
      * regular expression replaced with the substitution String (if this regular
      * expression object doesn't match at any position, the original String is returned
      * unchanged).
      */
     public String subst(String substituteIn, String substitution, int flags)
     {
         // String to return
         StringBuffer ret = new StringBuffer();

         // Start at position 0 and search the whole string
         int pos = 0;
         int len = substituteIn.length();

         // Try a match at each position
         while (pos < len && match(substituteIn, pos))
         {
             // Append string before match
             ret.append(substituteIn.substring(pos, getParenStart(0)));

             if ((flags & REPLACE_BACKREFERENCES) != 0)
             {
                 // Process backreferences
                 int lCurrentPosition = 0;
                 int lLastPosition = -2;
                 int lLength = substitution.length();
                 boolean bAddedPrefix = false;

                 while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0)
                 {
                     if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\')
                         && lCurrentPosition+1 < lLength)
                     {
                         char c = substitution.charAt(lCurrentPosition + 1);
                         if (c >= '0' && c <= '9')
                         {
                             if (bAddedPrefix == false)
                             {
                                 // Append everything between the beginning of the
                                 // substitution string and the current $ sign
                                 ret.append(substitution.substring(0, lCurrentPosition));
                                 bAddedPrefix = true;
                             }
                             else
                             {
                                 // Append everything between the last and the current $ sign
                                 ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition));
                             }

                             // Append the parenthesized expression
                             // Note: if a parenthesized expression of the requested
                             // index is not available "null" is added to the string
                             ret.append(getParen(c - '0'));
                             lLastPosition = lCurrentPosition;
                         }
                     }

                     // Move forward, skipping past match
                     lCurrentPosition++;
                 }

                 // Append everything after the last $ sign
                 ret.append(substitution.substring(lLastPosition + 2, lLength));
             }
             else
             {
                 // Append substitution without processing backreferences
                 ret.append(substitution);
             }

             // Move forward, skipping past match
             int newpos = getParenEnd(0);

             // We always want to make progress!
             if (newpos == pos)
             {
                 newpos++;
             }

             // Try new position
             pos = newpos;

             // Break out if we're only supposed to replace one occurrence
             if ((flags & REPLACE_FIRSTONLY) != 0)
             {
                 break;
             }
         }

         // If there's remaining input, append it
         if (pos < len)
         {
             ret.append(substituteIn.substring(pos));
         }

         // Return string buffer as string
         return ret.toString();
     }

     /**
      * Returns an array of Strings, whose toString representation matches a regular
      * expression. This method works like the Perl function of the same name.  Given
      * a regular expression of "a*b" and an array of String objects of [foo, aab, zzz,
      * aaaab], the array of Strings returned by grep would be [aab, aaaab].
      *
      * @param search Array of Objects to search
      * @return Array of Strings whose toString() value matches this regular expression.
      */
     public String[] grep(Object[] search)
     {
         // Create new vector to hold return items
         Vector v = new Vector();

         // Traverse array of objects
         for (int i = 0; i < search.length; i++)
         {
             // Get next object as a string
             String s = search[i].toString();

             // If it matches this regexp, add it to the list
             if (match(s))
             {
                 v.addElement(s);
             }
         }

         // Return vector as an array of strings
         String[] ret = new String[v.size()];
         v.copyInto(ret);
         return ret;
     }

     /**
      * @return true if character at i-th position in the <code>search</code> string is a newline
      */
     private boolean isNewline(int i)
     {
         char nextChar = search.charAt(i);

         if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
             || nextChar == '\u2028' || nextChar == '\u2029')
         {
             return true;
         }

         return false;
     }

     /**
      * Compares two characters.
      *
      * @param c1 first character to compare.
      * @param c2 second character to compare.
      * @param caseIndependent whether comparision is case insensitive or not.
      * @return negative, 0, or positive integer as the first character
      *         less than, equal to, or greater then the second.
      */
     private int compareChars(char c1, char c2, boolean caseIndependent)
     {
         if (caseIndependent)
         {
             c1 = Character.toLowerCase(c1);
             c2 = Character.toLowerCase(c2);
         }
         return ((int)c1 - (int)c2);
     }
 }