| /* |
| * reserved comment block |
| * DO NOT REMOVE OR ALTER! |
| */ |
| /* |
| * Copyright 1999-2004 The Apache Software Foundation. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.sun.org.apache.regexp.internal; |
| |
| import java.io.Serializable; |
| |
| /** |
| * A class that holds compiled regular expressions. This is exposed mainly |
| * for use by the recompile utility (which helps you produce precompiled |
| * REProgram objects). You should not otherwise need to work directly with |
| * this class. |
| * |
| * @see RE |
| * @see RECompiler |
| * |
| * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> |
| */ |
| public class REProgram implements Serializable |
| { |
| static final int OPT_HASBACKREFS = 1; |
| |
| char[] instruction; // The compiled regular expression 'program' |
| int lenInstruction; // The amount of the instruction buffer in use |
| char[] prefix; // Prefix string optimization |
| int flags; // Optimization flags (REProgram.OPT_*) |
| int maxParens = -1; |
| |
| /** |
| * Constructs a program object from a character array |
| * @param instruction Character array with RE opcode instructions in it |
| */ |
| public REProgram(char[] instruction) |
| { |
| this(instruction, instruction.length); |
| } |
| |
| /** |
| * Constructs a program object from a character array |
| * @param parens Count of parens in the program |
| * @param instruction Character array with RE opcode instructions in it |
| */ |
| public REProgram(int parens, char[] instruction) |
| { |
| this(instruction, instruction.length); |
| this.maxParens = parens; |
| } |
| |
| /** |
| * Constructs a program object from a character array |
| * @param instruction Character array with RE opcode instructions in it |
| * @param lenInstruction Amount of instruction array in use |
| */ |
| public REProgram(char[] instruction, int lenInstruction) |
| { |
| setInstructions(instruction, lenInstruction); |
| } |
| |
| /** |
| * Returns a copy of the current regular expression program in a character |
| * array that is exactly the right length to hold the program. If there is |
| * no program compiled yet, getInstructions() will return null. |
| * @return A copy of the current compiled RE program |
| */ |
| public char[] getInstructions() |
| { |
| // Ensure program has been compiled! |
| if (lenInstruction != 0) |
| { |
| // Return copy of program |
| char[] ret = new char[lenInstruction]; |
| System.arraycopy(instruction, 0, ret, 0, lenInstruction); |
| return ret; |
| } |
| return null; |
| } |
| |
| /** |
| * Sets a new regular expression program to run. It is this method which |
| * performs any special compile-time search optimizations. Currently only |
| * two optimizations are in place - one which checks for backreferences |
| * (so that they can be lazily allocated) and another which attempts to |
| * find an prefix anchor string so that substantial amounts of input can |
| * potentially be skipped without running the actual program. |
| * @param instruction Program instruction buffer |
| * @param lenInstruction Length of instruction buffer in use |
| */ |
| public void setInstructions(char[] instruction, int lenInstruction) |
| { |
| // Save reference to instruction array |
| this.instruction = instruction; |
| this.lenInstruction = lenInstruction; |
| |
| // Initialize other program-related variables |
| flags = 0; |
| prefix = null; |
| |
| // Try various compile-time optimizations if there's a program |
| if (instruction != null && lenInstruction != 0) |
| { |
| // If the first node is a branch |
| if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) |
| { |
| // to the end node |
| int next = instruction[0 + RE.offsetNext]; |
| if (instruction[next + RE.offsetOpcode] == RE.OP_END) |
| { |
| // and the branch starts with an atom |
| if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) |
| { |
| // then get that atom as an prefix because there's no other choice |
| int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata]; |
| prefix = new char[lenAtom]; |
| System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom); |
| } |
| } |
| } |
| |
| BackrefScanLoop: |
| |
| // Check for backreferences |
| for (int i = 0; i < lenInstruction; i += RE.nodeSize) |
| { |
| switch (instruction[i + RE.offsetOpcode]) |
| { |
| case RE.OP_ANYOF: |
| i += (instruction[i + RE.offsetOpdata] * 2); |
| break; |
| |
| case RE.OP_ATOM: |
| i += instruction[i + RE.offsetOpdata]; |
| break; |
| |
| case RE.OP_BACKREF: |
| flags |= OPT_HASBACKREFS; |
| break BackrefScanLoop; |
| } |
| } |
| } |
| } |
| } |