| /* |
| * Copyright (C) 2011 The Guava Authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| * in compliance with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software distributed under the License |
| * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
| * or implied. See the License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| |
| package com.google.common.hash; |
| |
| import static com.google.common.base.Preconditions.checkArgument; |
| |
| import com.google.common.math.LongMath; |
| import com.google.common.primitives.Ints; |
| import com.google.common.primitives.Longs; |
| import java.math.RoundingMode; |
| import java.util.Arrays; |
| import java.util.concurrent.atomic.AtomicLongArray; |
| import org.checkerframework.checker.nullness.compatqual.NullableDecl; |
| |
| /** |
| * Collections of strategies of generating the k * log(M) bits required for an element to be mapped |
| * to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form |
| * of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only |
| * introduction of new versions). |
| * |
| * <p>Important: the order of the constants cannot change, and they cannot be deleted - we depend on |
| * their ordinal for BloomFilter serialization. |
| * |
| * @author Dimitris Andreou |
| * @author Kurt Alfred Kluever |
| */ |
| enum BloomFilterStrategies implements BloomFilter.Strategy { |
| /** |
| * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael |
| * Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the |
| * performance of a Bloom filter (yet only needs two 32bit hash functions). |
| */ |
| MURMUR128_MITZ_32() { |
| @Override |
| public <T> boolean put( |
| T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) { |
| long bitSize = bits.bitSize(); |
| long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); |
| int hash1 = (int) hash64; |
| int hash2 = (int) (hash64 >>> 32); |
| |
| boolean bitsChanged = false; |
| for (int i = 1; i <= numHashFunctions; i++) { |
| int combinedHash = hash1 + (i * hash2); |
| // Flip all the bits if it's negative (guaranteed positive number) |
| if (combinedHash < 0) { |
| combinedHash = ~combinedHash; |
| } |
| bitsChanged |= bits.set(combinedHash % bitSize); |
| } |
| return bitsChanged; |
| } |
| |
| @Override |
| public <T> boolean mightContain( |
| T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) { |
| long bitSize = bits.bitSize(); |
| long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); |
| int hash1 = (int) hash64; |
| int hash2 = (int) (hash64 >>> 32); |
| |
| for (int i = 1; i <= numHashFunctions; i++) { |
| int combinedHash = hash1 + (i * hash2); |
| // Flip all the bits if it's negative (guaranteed positive number) |
| if (combinedHash < 0) { |
| combinedHash = ~combinedHash; |
| } |
| if (!bits.get(combinedHash % bitSize)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| }, |
| /** |
| * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different |
| * than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the |
| * loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by |
| * AND'ing with Long.MAX_VALUE instead of flipping the bits. |
| */ |
| MURMUR128_MITZ_64() { |
| @Override |
| public <T> boolean put( |
| T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) { |
| long bitSize = bits.bitSize(); |
| byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); |
| long hash1 = lowerEight(bytes); |
| long hash2 = upperEight(bytes); |
| |
| boolean bitsChanged = false; |
| long combinedHash = hash1; |
| for (int i = 0; i < numHashFunctions; i++) { |
| // Make the combined hash positive and indexable |
| bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize); |
| combinedHash += hash2; |
| } |
| return bitsChanged; |
| } |
| |
| @Override |
| public <T> boolean mightContain( |
| T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) { |
| long bitSize = bits.bitSize(); |
| byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); |
| long hash1 = lowerEight(bytes); |
| long hash2 = upperEight(bytes); |
| |
| long combinedHash = hash1; |
| for (int i = 0; i < numHashFunctions; i++) { |
| // Make the combined hash positive and indexable |
| if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) { |
| return false; |
| } |
| combinedHash += hash2; |
| } |
| return true; |
| } |
| |
| private /* static */ long lowerEight(byte[] bytes) { |
| return Longs.fromBytes( |
| bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); |
| } |
| |
| private /* static */ long upperEight(byte[] bytes) { |
| return Longs.fromBytes( |
| bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]); |
| } |
| }; |
| |
| /** |
| * Models a lock-free array of bits. |
| * |
| * <p>We use this instead of java.util.BitSet because we need access to the array of longs and we |
| * need compare-and-swap. |
| */ |
| static final class LockFreeBitArray { |
| private static final int LONG_ADDRESSABLE_BITS = 6; |
| final AtomicLongArray data; |
| private final LongAddable bitCount; |
| |
| LockFreeBitArray(long bits) { |
| this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]); |
| } |
| |
| // Used by serialization |
| LockFreeBitArray(long[] data) { |
| checkArgument(data.length > 0, "data length is zero!"); |
| this.data = new AtomicLongArray(data); |
| this.bitCount = LongAddables.create(); |
| long bitCount = 0; |
| for (long value : data) { |
| bitCount += Long.bitCount(value); |
| } |
| this.bitCount.add(bitCount); |
| } |
| |
| /** Returns true if the bit changed value. */ |
| boolean set(long bitIndex) { |
| if (get(bitIndex)) { |
| return false; |
| } |
| |
| int longIndex = (int) (bitIndex >>> LONG_ADDRESSABLE_BITS); |
| long mask = 1L << bitIndex; // only cares about low 6 bits of bitIndex |
| |
| long oldValue; |
| long newValue; |
| do { |
| oldValue = data.get(longIndex); |
| newValue = oldValue | mask; |
| if (oldValue == newValue) { |
| return false; |
| } |
| } while (!data.compareAndSet(longIndex, oldValue, newValue)); |
| |
| // We turned the bit on, so increment bitCount. |
| bitCount.increment(); |
| return true; |
| } |
| |
| boolean get(long bitIndex) { |
| return (data.get((int) (bitIndex >>> LONG_ADDRESSABLE_BITS)) & (1L << bitIndex)) != 0; |
| } |
| |
| /** |
| * Careful here: if threads are mutating the atomicLongArray while this method is executing, the |
| * final long[] will be a "rolling snapshot" of the state of the bit array. This is usually good |
| * enough, but should be kept in mind. |
| */ |
| public static long[] toPlainArray(AtomicLongArray atomicLongArray) { |
| long[] array = new long[atomicLongArray.length()]; |
| for (int i = 0; i < array.length; ++i) { |
| array[i] = atomicLongArray.get(i); |
| } |
| return array; |
| } |
| |
| /** Number of bits */ |
| long bitSize() { |
| return (long) data.length() * Long.SIZE; |
| } |
| |
| /** |
| * Number of set bits (1s). |
| * |
| * <p>Note that because of concurrent set calls and uses of atomics, this bitCount is a (very) |
| * close *estimate* of the actual number of bits set. It's not possible to do better than an |
| * estimate without locking. Note that the number, if not exactly accurate, is *always* |
| * underestimating, never overestimating. |
| */ |
| long bitCount() { |
| return bitCount.sum(); |
| } |
| |
| LockFreeBitArray copy() { |
| return new LockFreeBitArray(toPlainArray(data)); |
| } |
| |
| /** |
| * Combines the two BitArrays using bitwise OR. |
| * |
| * <p>NOTE: Because of the use of atomics, if the other LockFreeBitArray is being mutated while |
| * this operation is executing, not all of those new 1's may be set in the final state of this |
| * LockFreeBitArray. The ONLY guarantee provided is that all the bits that were set in the other |
| * LockFreeBitArray at the start of this method will be set in this LockFreeBitArray at the end |
| * of this method. |
| */ |
| void putAll(LockFreeBitArray other) { |
| checkArgument( |
| data.length() == other.data.length(), |
| "BitArrays must be of equal length (%s != %s)", |
| data.length(), |
| other.data.length()); |
| for (int i = 0; i < data.length(); i++) { |
| long otherLong = other.data.get(i); |
| |
| long ourLongOld; |
| long ourLongNew; |
| boolean changedAnyBits = true; |
| do { |
| ourLongOld = data.get(i); |
| ourLongNew = ourLongOld | otherLong; |
| if (ourLongOld == ourLongNew) { |
| changedAnyBits = false; |
| break; |
| } |
| } while (!data.compareAndSet(i, ourLongOld, ourLongNew)); |
| |
| if (changedAnyBits) { |
| int bitsAdded = Long.bitCount(ourLongNew) - Long.bitCount(ourLongOld); |
| bitCount.add(bitsAdded); |
| } |
| } |
| } |
| |
| @Override |
| public boolean equals(@NullableDecl Object o) { |
| if (o instanceof LockFreeBitArray) { |
| LockFreeBitArray lockFreeBitArray = (LockFreeBitArray) o; |
| // TODO(lowasser): avoid allocation here |
| return Arrays.equals(toPlainArray(data), toPlainArray(lockFreeBitArray.data)); |
| } |
| return false; |
| } |
| |
| @Override |
| public int hashCode() { |
| // TODO(lowasser): avoid allocation here |
| return Arrays.hashCode(toPlainArray(data)); |
| } |
| } |
| } |