| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.misc; |
| |
| import org.antlr.analysis.Label; |
| import org.antlr.tool.Grammar; |
| |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.ListIterator; |
| |
| /** A set of integers that relies on ranges being common to do |
| * "run-length-encoded" like compression (if you view an IntSet like |
| * a BitSet with runs of 0s and 1s). Only ranges are recorded so that |
| * a few ints up near value 1000 don't cause massive bitsets, just two |
| * integer intervals. |
| * |
| * element values may be negative. Useful for sets of EPSILON and EOF. |
| * |
| * 0..9 char range is index pair ['\u0030','\u0039']. |
| * Multiple ranges are encoded with multiple index pairs. Isolated |
| * elements are encoded with an index pair where both intervals are the same. |
| * |
| * The ranges are ordered and disjoint so that 2..6 appears before 101..103. |
| */ |
| public class IntervalSet implements IntSet { |
| public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE); |
| |
| /** The list of sorted, disjoint intervals. */ |
| protected List<Interval> intervals; |
| |
| /** Create a set with no elements */ |
| public IntervalSet() { |
| intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements |
| } |
| |
| public IntervalSet(List<Interval> intervals) { |
| this.intervals = intervals; |
| } |
| |
| /** Create a set with a single element, el. */ |
| public static IntervalSet of(int a) { |
| IntervalSet s = new IntervalSet(); |
| s.add(a); |
| return s; |
| } |
| |
| /** Create a set with all ints within range [a..b] (inclusive) */ |
| public static IntervalSet of(int a, int b) { |
| IntervalSet s = new IntervalSet(); |
| s.add(a,b); |
| return s; |
| } |
| |
| /** Add a single element to the set. An isolated element is stored |
| * as a range el..el. |
| */ |
| public void add(int el) { |
| add(el,el); |
| } |
| |
| /** Add interval; i.e., add all integers from a to b to set. |
| * If b<a, do nothing. |
| * Keep list in sorted order (by left range value). |
| * If overlap, combine ranges. For example, |
| * If this is {1..5, 10..20}, adding 6..7 yields |
| * {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. |
| */ |
| public void add(int a, int b) { |
| add(Interval.create(a,b)); |
| } |
| |
| // copy on write so we can cache a..a intervals and sets of that |
| protected void add(Interval addition) { |
| //System.out.println("add "+addition+" to "+intervals.toString()); |
| if ( addition.b<addition.a ) { |
| return; |
| } |
| // find position in list |
| // Use iterators as we modify list in place |
| for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { |
| Interval r = (Interval) iter.next(); |
| if ( addition.equals(r) ) { |
| return; |
| } |
| if ( addition.adjacent(r) || !addition.disjoint(r) ) { |
| // next to each other, make a single larger interval |
| Interval bigger = addition.union(r); |
| iter.set(bigger); |
| // make sure we didn't just create an interval that |
| // should be merged with next interval in list |
| if ( iter.hasNext() ) { |
| Interval next = (Interval) iter.next(); |
| if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { |
| // if we bump up against or overlap next, merge |
| iter.remove(); // remove this one |
| iter.previous(); // move backwards to what we just set |
| iter.set(bigger.union(next)); // set to 3 merged ones |
| } |
| } |
| return; |
| } |
| if ( addition.startsBeforeDisjoint(r) ) { |
| // insert before r |
| iter.previous(); |
| iter.add(addition); |
| return; |
| } |
| // if disjoint and after r, a future iteration will handle it |
| } |
| // ok, must be after last interval (and disjoint from last interval) |
| // just add it |
| intervals.add(addition); |
| } |
| |
| /* |
| protected void add(Interval addition) { |
| //System.out.println("add "+addition+" to "+intervals.toString()); |
| if ( addition.b<addition.a ) { |
| return; |
| } |
| // find position in list |
| //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { |
| int n = intervals.size(); |
| for (int i=0; i<n; i++) { |
| Interval r = (Interval)intervals.get(i); |
| if ( addition.equals(r) ) { |
| return; |
| } |
| if ( addition.adjacent(r) || !addition.disjoint(r) ) { |
| // next to each other, make a single larger interval |
| Interval bigger = addition.union(r); |
| intervals.set(i, bigger); |
| // make sure we didn't just create an interval that |
| // should be merged with next interval in list |
| if ( (i+1)<n ) { |
| i++; |
| Interval next = (Interval)intervals.get(i); |
| if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { |
| // if we bump up against or overlap next, merge |
| intervals.remove(i); // remove next one |
| i--; |
| intervals.set(i, bigger.union(next)); // set to 3 merged ones |
| } |
| } |
| return; |
| } |
| if ( addition.startsBeforeDisjoint(r) ) { |
| // insert before r |
| intervals.add(i, addition); |
| return; |
| } |
| // if disjoint and after r, a future iteration will handle it |
| } |
| // ok, must be after last interval (and disjoint from last interval) |
| // just add it |
| intervals.add(addition); |
| } |
| */ |
| |
| public void addAll(IntSet set) { |
| if ( set==null ) { |
| return; |
| } |
| if ( !(set instanceof IntervalSet) ) { |
| throw new IllegalArgumentException("can't add non IntSet ("+ |
| set.getClass().getName()+ |
| ") to IntervalSet"); |
| } |
| IntervalSet other = (IntervalSet)set; |
| // walk set and add each interval |
| int n = other.intervals.size(); |
| for (int i = 0; i < n; i++) { |
| Interval I = (Interval) other.intervals.get(i); |
| this.add(I.a,I.b); |
| } |
| } |
| |
| public IntSet complement(int minElement, int maxElement) { |
| return this.complement(IntervalSet.of(minElement,maxElement)); |
| } |
| |
| /** Given the set of possible values (rather than, say UNICODE or MAXINT), |
| * return a new set containing all elements in vocabulary, but not in |
| * this. The computation is (vocabulary - this). |
| * |
| * 'this' is assumed to be either a subset or equal to vocabulary. |
| */ |
| public IntSet complement(IntSet vocabulary) { |
| if ( vocabulary==null ) { |
| return null; // nothing in common with null set |
| } |
| if ( !(vocabulary instanceof IntervalSet ) ) { |
| throw new IllegalArgumentException("can't complement with non IntervalSet ("+ |
| vocabulary.getClass().getName()+")"); |
| } |
| IntervalSet vocabularyIS = ((IntervalSet)vocabulary); |
| int maxElement = vocabularyIS.getMaxElement(); |
| |
| IntervalSet compl = new IntervalSet(); |
| int n = intervals.size(); |
| if ( n ==0 ) { |
| return compl; |
| } |
| Interval first = (Interval)intervals.get(0); |
| // add a range from 0 to first.a constrained to vocab |
| if ( first.a > 0 ) { |
| IntervalSet s = IntervalSet.of(0, first.a-1); |
| IntervalSet a = (IntervalSet)s.and(vocabularyIS); |
| compl.addAll(a); |
| } |
| for (int i=1; i<n; i++) { // from 2nd interval .. nth |
| Interval previous = (Interval)intervals.get(i-1); |
| Interval current = (Interval)intervals.get(i); |
| IntervalSet s = IntervalSet.of(previous.b+1, current.a-1); |
| IntervalSet a = (IntervalSet)s.and(vocabularyIS); |
| compl.addAll(a); |
| } |
| Interval last = (Interval)intervals.get(n -1); |
| // add a range from last.b to maxElement constrained to vocab |
| if ( last.b < maxElement ) { |
| IntervalSet s = IntervalSet.of(last.b+1, maxElement); |
| IntervalSet a = (IntervalSet)s.and(vocabularyIS); |
| compl.addAll(a); |
| } |
| return compl; |
| } |
| |
| /** Compute this-other via this&~other. |
| * Return a new set containing all elements in this but not in other. |
| * other is assumed to be a subset of this; |
| * anything that is in other but not in this will be ignored. |
| */ |
| public IntSet subtract(IntSet other) { |
| // assume the whole unicode range here for the complement |
| // because it doesn't matter. Anything beyond the max of this' set |
| // will be ignored since we are doing this & ~other. The intersection |
| // will be empty. The only problem would be when this' set max value |
| // goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE |
| // will prevent this. |
| return this.and(((IntervalSet)other).complement(COMPLETE_SET)); |
| } |
| |
| /** return a new set containing all elements in this but not in other. |
| * Intervals may have to be broken up when ranges in this overlap |
| * with ranges in other. other is assumed to be a subset of this; |
| * anything that is in other but not in this will be ignored. |
| * |
| * Keep around, but 10-20-2005, I decided to make complement work w/o |
| * subtract and so then subtract can simply be a&~b |
| * |
| public IntSet subtract(IntSet other) { |
| if ( other==null || !(other instanceof IntervalSet) ) { |
| return null; // nothing in common with null set |
| } |
| |
| IntervalSet diff = new IntervalSet(); |
| |
| // iterate down both interval lists |
| ListIterator thisIter = this.intervals.listIterator(); |
| ListIterator otherIter = ((IntervalSet)other).intervals.listIterator(); |
| Interval mine=null; |
| Interval theirs=null; |
| if ( thisIter.hasNext() ) { |
| mine = (Interval)thisIter.next(); |
| } |
| if ( otherIter.hasNext() ) { |
| theirs = (Interval)otherIter.next(); |
| } |
| while ( mine!=null ) { |
| //System.out.println("mine="+mine+", theirs="+theirs); |
| // CASE 1: nothing in theirs removes a chunk from mine |
| if ( theirs==null || mine.disjoint(theirs) ) { |
| // SUBCASE 1a: finished traversing theirs; keep adding mine now |
| if ( theirs==null ) { |
| // add everything in mine to difference since theirs done |
| diff.add(mine); |
| mine = null; |
| if ( thisIter.hasNext() ) { |
| mine = (Interval)thisIter.next(); |
| } |
| } |
| else { |
| // SUBCASE 1b: mine is completely to the left of theirs |
| // so we can add to difference; move mine, but not theirs |
| if ( mine.startsBeforeDisjoint(theirs) ) { |
| diff.add(mine); |
| mine = null; |
| if ( thisIter.hasNext() ) { |
| mine = (Interval)thisIter.next(); |
| } |
| } |
| // SUBCASE 1c: theirs is completely to the left of mine |
| else { |
| // keep looking in theirs |
| theirs = null; |
| if ( otherIter.hasNext() ) { |
| theirs = (Interval)otherIter.next(); |
| } |
| } |
| } |
| } |
| else { |
| // CASE 2: theirs breaks mine into two chunks |
| if ( mine.properlyContains(theirs) ) { |
| // must add two intervals: stuff to left and stuff to right |
| diff.add(mine.a, theirs.a-1); |
| // don't actually add stuff to right yet as next 'theirs' |
| // might overlap with it |
| // The stuff to the right might overlap with next "theirs". |
| // so it is considered next |
| Interval right = new Interval(theirs.b+1, mine.b); |
| mine = right; |
| // move theirs forward |
| theirs = null; |
| if ( otherIter.hasNext() ) { |
| theirs = (Interval)otherIter.next(); |
| } |
| } |
| |
| // CASE 3: theirs covers mine; nothing to add to diff |
| else if ( theirs.properlyContains(mine) ) { |
| // nothing to add, theirs forces removal totally of mine |
| // just move mine looking for an overlapping interval |
| mine = null; |
| if ( thisIter.hasNext() ) { |
| mine = (Interval)thisIter.next(); |
| } |
| } |
| |
| // CASE 4: non proper overlap |
| else { |
| // overlap, but not properly contained |
| diff.add(mine.differenceNotProperlyContained(theirs)); |
| // update iterators |
| boolean moveTheirs = true; |
| if ( mine.startsBeforeNonDisjoint(theirs) || |
| theirs.b > mine.b ) |
| { |
| // uh oh, right of theirs extends past right of mine |
| // therefore could overlap with next of mine so don't |
| // move theirs iterator yet |
| moveTheirs = false; |
| } |
| // always move mine |
| mine = null; |
| if ( thisIter.hasNext() ) { |
| mine = (Interval)thisIter.next(); |
| } |
| if ( moveTheirs ) { |
| theirs = null; |
| if ( otherIter.hasNext() ) { |
| theirs = (Interval)otherIter.next(); |
| } |
| } |
| } |
| } |
| } |
| return diff; |
| } |
| */ |
| |
| /** TODO: implement this! */ |
| public IntSet or(IntSet a) { |
| IntervalSet o = new IntervalSet(); |
| o.addAll(this); |
| o.addAll(a); |
| //throw new NoSuchMethodError(); |
| return o; |
| } |
| |
| /** Return a new set with the intersection of this set with other. Because |
| * the intervals are sorted, we can use an iterator for each list and |
| * just walk them together. This is roughly O(min(n,m)) for interval |
| * list lengths n and m. |
| */ |
| public IntSet and(IntSet other) { |
| if ( other==null ) { //|| !(other instanceof IntervalSet) ) { |
| return null; // nothing in common with null set |
| } |
| |
| ArrayList myIntervals = (ArrayList)this.intervals; |
| ArrayList theirIntervals = (ArrayList)((IntervalSet)other).intervals; |
| IntervalSet intersection = null; |
| int mySize = myIntervals.size(); |
| int theirSize = theirIntervals.size(); |
| int i = 0; |
| int j = 0; |
| // iterate down both interval lists looking for nondisjoint intervals |
| while ( i<mySize && j<theirSize ) { |
| Interval mine = (Interval)myIntervals.get(i); |
| Interval theirs = (Interval)theirIntervals.get(j); |
| //System.out.println("mine="+mine+" and theirs="+theirs); |
| if ( mine.startsBeforeDisjoint(theirs) ) { |
| // move this iterator looking for interval that might overlap |
| i++; |
| } |
| else if ( theirs.startsBeforeDisjoint(mine) ) { |
| // move other iterator looking for interval that might overlap |
| j++; |
| } |
| else if ( mine.properlyContains(theirs) ) { |
| // overlap, add intersection, get next theirs |
| if ( intersection==null ) { |
| intersection = new IntervalSet(); |
| } |
| intersection.add(mine.intersection(theirs)); |
| j++; |
| } |
| else if ( theirs.properlyContains(mine) ) { |
| // overlap, add intersection, get next mine |
| if ( intersection==null ) { |
| intersection = new IntervalSet(); |
| } |
| intersection.add(mine.intersection(theirs)); |
| i++; |
| } |
| else if ( !mine.disjoint(theirs) ) { |
| // overlap, add intersection |
| if ( intersection==null ) { |
| intersection = new IntervalSet(); |
| } |
| intersection.add(mine.intersection(theirs)); |
| // Move the iterator of lower range [a..b], but not |
| // the upper range as it may contain elements that will collide |
| // with the next iterator. So, if mine=[0..115] and |
| // theirs=[115..200], then intersection is 115 and move mine |
| // but not theirs as theirs may collide with the next range |
| // in thisIter. |
| // move both iterators to next ranges |
| if ( mine.startsAfterNonDisjoint(theirs) ) { |
| j++; |
| } |
| else if ( theirs.startsAfterNonDisjoint(mine) ) { |
| i++; |
| } |
| } |
| } |
| if ( intersection==null ) { |
| return new IntervalSet(); |
| } |
| return intersection; |
| } |
| |
| /** Is el in any range of this set? */ |
| public boolean member(int el) { |
| int n = intervals.size(); |
| for (int i = 0; i < n; i++) { |
| Interval I = (Interval) intervals.get(i); |
| int a = I.a; |
| int b = I.b; |
| if ( el<a ) { |
| break; // list is sorted and el is before this interval; not here |
| } |
| if ( el>=a && el<=b ) { |
| return true; // found in this interval |
| } |
| } |
| return false; |
| /* |
| for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { |
| Interval I = (Interval) iter.next(); |
| if ( el<I.a ) { |
| break; // list is sorted and el is before this interval; not here |
| } |
| if ( el>=I.a && el<=I.b ) { |
| return true; // found in this interval |
| } |
| } |
| return false; |
| */ |
| } |
| |
| /** return true if this set has no members */ |
| public boolean isNil() { |
| return intervals==null || intervals.size()==0; |
| } |
| |
| /** If this set is a single integer, return it otherwise Label.INVALID */ |
| public int getSingleElement() { |
| if ( intervals!=null && intervals.size()==1 ) { |
| Interval I = (Interval)intervals.get(0); |
| if ( I.a == I.b ) { |
| return I.a; |
| } |
| } |
| return Label.INVALID; |
| } |
| |
| public int getMaxElement() { |
| if ( isNil() ) { |
| return Label.INVALID; |
| } |
| Interval last = (Interval)intervals.get(intervals.size()-1); |
| return last.b; |
| } |
| |
| /** Return minimum element >= 0 */ |
| public int getMinElement() { |
| if ( isNil() ) { |
| return Label.INVALID; |
| } |
| int n = intervals.size(); |
| for (int i = 0; i < n; i++) { |
| Interval I = (Interval) intervals.get(i); |
| int a = I.a; |
| int b = I.b; |
| for (int v=a; v<=b; v++) { |
| if ( v>=0 ) return v; |
| } |
| } |
| return Label.INVALID; |
| } |
| |
| /** Return a list of Interval objects. */ |
| public List<Interval> getIntervals() { |
| return intervals; |
| } |
| |
| /** Are two IntervalSets equal? Because all intervals are sorted |
| * and disjoint, equals is a simple linear walk over both lists |
| * to make sure they are the same. Interval.equals() is used |
| * by the List.equals() method to check the ranges. |
| */ |
| public boolean equals(Object obj) { |
| if ( obj==null || !(obj instanceof IntervalSet) ) { |
| return false; |
| } |
| IntervalSet other = (IntervalSet)obj; |
| return this.intervals.equals(other.intervals); |
| } |
| |
| public String toString() { |
| return toString(null); |
| } |
| |
| public String toString(Grammar g) { |
| StringBuffer buf = new StringBuffer(); |
| if ( this.intervals==null || this.intervals.size()==0 ) { |
| return "{}"; |
| } |
| if ( this.intervals.size()>1 ) { |
| buf.append("{"); |
| } |
| Iterator iter = this.intervals.iterator(); |
| while (iter.hasNext()) { |
| Interval I = (Interval) iter.next(); |
| int a = I.a; |
| int b = I.b; |
| if ( a==b ) { |
| if ( g!=null ) { |
| buf.append(g.getTokenDisplayName(a)); |
| } |
| else { |
| buf.append(a); |
| } |
| } |
| else { |
| if ( g!=null ) { |
| buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b)); |
| } |
| else { |
| buf.append(a+".."+b); |
| } |
| } |
| if ( iter.hasNext() ) { |
| buf.append(", "); |
| } |
| } |
| if ( this.intervals.size()>1 ) { |
| buf.append("}"); |
| } |
| return buf.toString(); |
| } |
| |
| public int size() { |
| int n = 0; |
| int numIntervals = intervals.size(); |
| if ( numIntervals==1 ) { |
| Interval firstInterval = this.intervals.get(0); |
| return firstInterval.b-firstInterval.a+1; |
| } |
| for (int i = 0; i < numIntervals; i++) { |
| Interval I = (Interval) intervals.get(i); |
| n += (I.b-I.a+1); |
| } |
| return n; |
| } |
| |
| public List toList() { |
| List values = new ArrayList(); |
| int n = intervals.size(); |
| for (int i = 0; i < n; i++) { |
| Interval I = (Interval) intervals.get(i); |
| int a = I.a; |
| int b = I.b; |
| for (int v=a; v<=b; v++) { |
| values.add(Utils.integer(v)); |
| } |
| } |
| return values; |
| } |
| |
| /** Get the ith element of ordered set. Used only by RandomPhrase so |
| * don't bother to implement if you're not doing that for a new |
| * ANTLR code gen target. |
| */ |
| public int get(int i) { |
| int n = intervals.size(); |
| int index = 0; |
| for (int j = 0; j < n; j++) { |
| Interval I = (Interval) intervals.get(j); |
| int a = I.a; |
| int b = I.b; |
| for (int v=a; v<=b; v++) { |
| if ( index==i ) { |
| return v; |
| } |
| index++; |
| } |
| } |
| return -1; |
| } |
| |
| public int[] toArray() { |
| int[] values = new int[size()]; |
| int n = intervals.size(); |
| int j = 0; |
| for (int i = 0; i < n; i++) { |
| Interval I = (Interval) intervals.get(i); |
| int a = I.a; |
| int b = I.b; |
| for (int v=a; v<=b; v++) { |
| values[j] = v; |
| j++; |
| } |
| } |
| return values; |
| } |
| |
| public org.antlr.runtime.BitSet toRuntimeBitSet() { |
| org.antlr.runtime.BitSet s = |
| new org.antlr.runtime.BitSet(getMaxElement()+1); |
| int n = intervals.size(); |
| for (int i = 0; i < n; i++) { |
| Interval I = (Interval) intervals.get(i); |
| int a = I.a; |
| int b = I.b; |
| for (int v=a; v<=b; v++) { |
| s.add(v); |
| } |
| } |
| return s; |
| } |
| |
| public void remove(int el) { |
| throw new NoSuchMethodError("IntervalSet.remove() unimplemented"); |
| } |
| |
| /* |
| protected void finalize() throws Throwable { |
| super.finalize(); |
| System.out.println("size "+intervals.size()+" "+size()); |
| } |
| */ |
| } |