| """ |
| An OrderedSet is a custom MutableSet that remembers its order, so that every |
| entry has an index that can be looked up. |
| |
| Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger, |
| and released under the MIT license. |
| """ |
| import itertools as it |
| from collections import deque |
| |
| try: |
| # Python 3 |
| from collections.abc import MutableSet, Sequence |
| except ImportError: |
| # Python 2.7 |
| from collections import MutableSet, Sequence |
| |
| SLICE_ALL = slice(None) |
| __version__ = "3.1" |
| |
| |
| def is_iterable(obj): |
| """ |
| Are we being asked to look up a list of things, instead of a single thing? |
| We check for the `__iter__` attribute so that this can cover types that |
| don't have to be known by this module, such as NumPy arrays. |
| |
| Strings, however, should be considered as atomic values to look up, not |
| iterables. The same goes for tuples, since they are immutable and therefore |
| valid entries. |
| |
| We don't need to check for the Python 2 `unicode` type, because it doesn't |
| have an `__iter__` attribute anyway. |
| """ |
| return ( |
| hasattr(obj, "__iter__") |
| and not isinstance(obj, str) |
| and not isinstance(obj, tuple) |
| ) |
| |
| |
| class OrderedSet(MutableSet, Sequence): |
| """ |
| An OrderedSet is a custom MutableSet that remembers its order, so that |
| every entry has an index that can be looked up. |
| |
| Example: |
| >>> OrderedSet([1, 1, 2, 3, 2]) |
| OrderedSet([1, 2, 3]) |
| """ |
| |
| def __init__(self, iterable=None): |
| self.items = [] |
| self.map = {} |
| if iterable is not None: |
| self |= iterable |
| |
| def __len__(self): |
| """ |
| Returns the number of unique elements in the ordered set |
| |
| Example: |
| >>> len(OrderedSet([])) |
| 0 |
| >>> len(OrderedSet([1, 2])) |
| 2 |
| """ |
| return len(self.items) |
| |
| def __getitem__(self, index): |
| """ |
| Get the item at a given index. |
| |
| If `index` is a slice, you will get back that slice of items, as a |
| new OrderedSet. |
| |
| If `index` is a list or a similar iterable, you'll get a list of |
| items corresponding to those indices. This is similar to NumPy's |
| "fancy indexing". The result is not an OrderedSet because you may ask |
| for duplicate indices, and the number of elements returned should be |
| the number of elements asked for. |
| |
| Example: |
| >>> oset = OrderedSet([1, 2, 3]) |
| >>> oset[1] |
| 2 |
| """ |
| if isinstance(index, slice) and index == SLICE_ALL: |
| return self.copy() |
| elif is_iterable(index): |
| return [self.items[i] for i in index] |
| elif hasattr(index, "__index__") or isinstance(index, slice): |
| result = self.items[index] |
| if isinstance(result, list): |
| return self.__class__(result) |
| else: |
| return result |
| else: |
| raise TypeError("Don't know how to index an OrderedSet by %r" % index) |
| |
| def copy(self): |
| """ |
| Return a shallow copy of this object. |
| |
| Example: |
| >>> this = OrderedSet([1, 2, 3]) |
| >>> other = this.copy() |
| >>> this == other |
| True |
| >>> this is other |
| False |
| """ |
| return self.__class__(self) |
| |
| def __getstate__(self): |
| if len(self) == 0: |
| # The state can't be an empty list. |
| # We need to return a truthy value, or else __setstate__ won't be run. |
| # |
| # This could have been done more gracefully by always putting the state |
| # in a tuple, but this way is backwards- and forwards- compatible with |
| # previous versions of OrderedSet. |
| return (None,) |
| else: |
| return list(self) |
| |
| def __setstate__(self, state): |
| if state == (None,): |
| self.__init__([]) |
| else: |
| self.__init__(state) |
| |
| def __contains__(self, key): |
| """ |
| Test if the item is in this ordered set |
| |
| Example: |
| >>> 1 in OrderedSet([1, 3, 2]) |
| True |
| >>> 5 in OrderedSet([1, 3, 2]) |
| False |
| """ |
| return key in self.map |
| |
| def add(self, key): |
| """ |
| Add `key` as an item to this OrderedSet, then return its index. |
| |
| If `key` is already in the OrderedSet, return the index it already |
| had. |
| |
| Example: |
| >>> oset = OrderedSet() |
| >>> oset.append(3) |
| 0 |
| >>> print(oset) |
| OrderedSet([3]) |
| """ |
| if key not in self.map: |
| self.map[key] = len(self.items) |
| self.items.append(key) |
| return self.map[key] |
| |
| append = add |
| |
| def update(self, sequence): |
| """ |
| Update the set with the given iterable sequence, then return the index |
| of the last element inserted. |
| |
| Example: |
| >>> oset = OrderedSet([1, 2, 3]) |
| >>> oset.update([3, 1, 5, 1, 4]) |
| 4 |
| >>> print(oset) |
| OrderedSet([1, 2, 3, 5, 4]) |
| """ |
| item_index = None |
| try: |
| for item in sequence: |
| item_index = self.add(item) |
| except TypeError: |
| raise ValueError( |
| "Argument needs to be an iterable, got %s" % type(sequence) |
| ) |
| return item_index |
| |
| def index(self, key): |
| """ |
| Get the index of a given entry, raising an IndexError if it's not |
| present. |
| |
| `key` can be an iterable of entries that is not a string, in which case |
| this returns a list of indices. |
| |
| Example: |
| >>> oset = OrderedSet([1, 2, 3]) |
| >>> oset.index(2) |
| 1 |
| """ |
| if is_iterable(key): |
| return [self.index(subkey) for subkey in key] |
| return self.map[key] |
| |
| # Provide some compatibility with pd.Index |
| get_loc = index |
| get_indexer = index |
| |
| def pop(self): |
| """ |
| Remove and return the last element from the set. |
| |
| Raises KeyError if the set is empty. |
| |
| Example: |
| >>> oset = OrderedSet([1, 2, 3]) |
| >>> oset.pop() |
| 3 |
| """ |
| if not self.items: |
| raise KeyError("Set is empty") |
| |
| elem = self.items[-1] |
| del self.items[-1] |
| del self.map[elem] |
| return elem |
| |
| def discard(self, key): |
| """ |
| Remove an element. Do not raise an exception if absent. |
| |
| The MutableSet mixin uses this to implement the .remove() method, which |
| *does* raise an error when asked to remove a non-existent item. |
| |
| Example: |
| >>> oset = OrderedSet([1, 2, 3]) |
| >>> oset.discard(2) |
| >>> print(oset) |
| OrderedSet([1, 3]) |
| >>> oset.discard(2) |
| >>> print(oset) |
| OrderedSet([1, 3]) |
| """ |
| if key in self: |
| i = self.map[key] |
| del self.items[i] |
| del self.map[key] |
| for k, v in self.map.items(): |
| if v >= i: |
| self.map[k] = v - 1 |
| |
| def clear(self): |
| """ |
| Remove all items from this OrderedSet. |
| """ |
| del self.items[:] |
| self.map.clear() |
| |
| def __iter__(self): |
| """ |
| Example: |
| >>> list(iter(OrderedSet([1, 2, 3]))) |
| [1, 2, 3] |
| """ |
| return iter(self.items) |
| |
| def __reversed__(self): |
| """ |
| Example: |
| >>> list(reversed(OrderedSet([1, 2, 3]))) |
| [3, 2, 1] |
| """ |
| return reversed(self.items) |
| |
| def __repr__(self): |
| if not self: |
| return "%s()" % (self.__class__.__name__,) |
| return "%s(%r)" % (self.__class__.__name__, list(self)) |
| |
| def __eq__(self, other): |
| """ |
| Returns true if the containers have the same items. If `other` is a |
| Sequence, then order is checked, otherwise it is ignored. |
| |
| Example: |
| >>> oset = OrderedSet([1, 3, 2]) |
| >>> oset == [1, 3, 2] |
| True |
| >>> oset == [1, 2, 3] |
| False |
| >>> oset == [2, 3] |
| False |
| >>> oset == OrderedSet([3, 2, 1]) |
| False |
| """ |
| # In Python 2 deque is not a Sequence, so treat it as one for |
| # consistent behavior with Python 3. |
| if isinstance(other, (Sequence, deque)): |
| # Check that this OrderedSet contains the same elements, in the |
| # same order, as the other object. |
| return list(self) == list(other) |
| try: |
| other_as_set = set(other) |
| except TypeError: |
| # If `other` can't be converted into a set, it's not equal. |
| return False |
| else: |
| return set(self) == other_as_set |
| |
| def union(self, *sets): |
| """ |
| Combines all unique items. |
| Each items order is defined by its first appearance. |
| |
| Example: |
| >>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0]) |
| >>> print(oset) |
| OrderedSet([3, 1, 4, 5, 2, 0]) |
| >>> oset.union([8, 9]) |
| OrderedSet([3, 1, 4, 5, 2, 0, 8, 9]) |
| >>> oset | {10} |
| OrderedSet([3, 1, 4, 5, 2, 0, 10]) |
| """ |
| cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet |
| containers = map(list, it.chain([self], sets)) |
| items = it.chain.from_iterable(containers) |
| return cls(items) |
| |
| def __and__(self, other): |
| # the parent implementation of this is backwards |
| return self.intersection(other) |
| |
| def intersection(self, *sets): |
| """ |
| Returns elements in common between all sets. Order is defined only |
| by the first set. |
| |
| Example: |
| >>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3]) |
| >>> print(oset) |
| OrderedSet([1, 2, 3]) |
| >>> oset.intersection([2, 4, 5], [1, 2, 3, 4]) |
| OrderedSet([2]) |
| >>> oset.intersection() |
| OrderedSet([1, 2, 3]) |
| """ |
| cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet |
| if sets: |
| common = set.intersection(*map(set, sets)) |
| items = (item for item in self if item in common) |
| else: |
| items = self |
| return cls(items) |
| |
| def difference(self, *sets): |
| """ |
| Returns all elements that are in this set but not the others. |
| |
| Example: |
| >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2])) |
| OrderedSet([1, 3]) |
| >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3])) |
| OrderedSet([1]) |
| >>> OrderedSet([1, 2, 3]) - OrderedSet([2]) |
| OrderedSet([1, 3]) |
| >>> OrderedSet([1, 2, 3]).difference() |
| OrderedSet([1, 2, 3]) |
| """ |
| cls = self.__class__ |
| if sets: |
| other = set.union(*map(set, sets)) |
| items = (item for item in self if item not in other) |
| else: |
| items = self |
| return cls(items) |
| |
| def issubset(self, other): |
| """ |
| Report whether another set contains this set. |
| |
| Example: |
| >>> OrderedSet([1, 2, 3]).issubset({1, 2}) |
| False |
| >>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4}) |
| True |
| >>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5}) |
| False |
| """ |
| if len(self) > len(other): # Fast check for obvious cases |
| return False |
| return all(item in other for item in self) |
| |
| def issuperset(self, other): |
| """ |
| Report whether this set contains another set. |
| |
| Example: |
| >>> OrderedSet([1, 2]).issuperset([1, 2, 3]) |
| False |
| >>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3}) |
| True |
| >>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3}) |
| False |
| """ |
| if len(self) < len(other): # Fast check for obvious cases |
| return False |
| return all(item in self for item in other) |
| |
| def symmetric_difference(self, other): |
| """ |
| Return the symmetric difference of two OrderedSets as a new set. |
| That is, the new set will contain all elements that are in exactly |
| one of the sets. |
| |
| Their order will be preserved, with elements from `self` preceding |
| elements from `other`. |
| |
| Example: |
| >>> this = OrderedSet([1, 4, 3, 5, 7]) |
| >>> other = OrderedSet([9, 7, 1, 3, 2]) |
| >>> this.symmetric_difference(other) |
| OrderedSet([4, 5, 9, 2]) |
| """ |
| cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet |
| diff1 = cls(self).difference(other) |
| diff2 = cls(other).difference(self) |
| return diff1.union(diff2) |
| |
| def _update_items(self, items): |
| """ |
| Replace the 'items' list of this OrderedSet with a new one, updating |
| self.map accordingly. |
| """ |
| self.items = items |
| self.map = {item: idx for (idx, item) in enumerate(items)} |
| |
| def difference_update(self, *sets): |
| """ |
| Update this OrderedSet to remove items from one or more other sets. |
| |
| Example: |
| >>> this = OrderedSet([1, 2, 3]) |
| >>> this.difference_update(OrderedSet([2, 4])) |
| >>> print(this) |
| OrderedSet([1, 3]) |
| |
| >>> this = OrderedSet([1, 2, 3, 4, 5]) |
| >>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6])) |
| >>> print(this) |
| OrderedSet([3, 5]) |
| """ |
| items_to_remove = set() |
| for other in sets: |
| items_to_remove |= set(other) |
| self._update_items([item for item in self.items if item not in items_to_remove]) |
| |
| def intersection_update(self, other): |
| """ |
| Update this OrderedSet to keep only items in another set, preserving |
| their order in this set. |
| |
| Example: |
| >>> this = OrderedSet([1, 4, 3, 5, 7]) |
| >>> other = OrderedSet([9, 7, 1, 3, 2]) |
| >>> this.intersection_update(other) |
| >>> print(this) |
| OrderedSet([1, 3, 7]) |
| """ |
| other = set(other) |
| self._update_items([item for item in self.items if item in other]) |
| |
| def symmetric_difference_update(self, other): |
| """ |
| Update this OrderedSet to remove items from another set, then |
| add items from the other set that were not present in this set. |
| |
| Example: |
| >>> this = OrderedSet([1, 4, 3, 5, 7]) |
| >>> other = OrderedSet([9, 7, 1, 3, 2]) |
| >>> this.symmetric_difference_update(other) |
| >>> print(this) |
| OrderedSet([4, 5, 9, 2]) |
| """ |
| items_to_add = [item for item in other if item not in self] |
| items_to_remove = set(other) |
| self._update_items( |
| [item for item in self.items if item not in items_to_remove] + items_to_add |
| ) |