blob: d234db139473a1c6c1d7ad85e44d50d4efac72c4 [file] [log] [blame]
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.math.LongMath;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import java.math.RoundingMode;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;
import org.checkerframework.checker.nullness.qual.Nullable;
/**
* Collections of strategies of generating the k * log(M) bits required for an element to be mapped
* to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form
* of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only
* introduction of new versions).
*
* <p>Important: the order of the constants cannot change, and they cannot be deleted - we depend on
* their ordinal for BloomFilter serialization.
*
* @author Dimitris Andreou
* @author Kurt Alfred Kluever
*/
enum BloomFilterStrategies implements BloomFilter.Strategy {
/**
* See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael
* Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
* performance of a Bloom filter (yet only needs two 32bit hash functions).
*/
MURMUR128_MITZ_32() {
@Override
public <T> boolean put(
T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
long bitSize = bits.bitSize();
long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
boolean bitsChanged = false;
for (int i = 1; i <= numHashFunctions; i++) {
int combinedHash = hash1 + (i * hash2);
// Flip all the bits if it's negative (guaranteed positive number)
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
bitsChanged |= bits.set(combinedHash % bitSize);
}
return bitsChanged;
}
@Override
public <T> boolean mightContain(
T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
long bitSize = bits.bitSize();
long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
for (int i = 1; i <= numHashFunctions; i++) {
int combinedHash = hash1 + (i * hash2);
// Flip all the bits if it's negative (guaranteed positive number)
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
if (!bits.get(combinedHash % bitSize)) {
return false;
}
}
return true;
}
},
/**
* This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different
* than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the
* loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by
* AND'ing with Long.MAX_VALUE instead of flipping the bits.
*/
MURMUR128_MITZ_64() {
@Override
public <T> boolean put(
T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
long bitSize = bits.bitSize();
byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
long hash1 = lowerEight(bytes);
long hash2 = upperEight(bytes);
boolean bitsChanged = false;
long combinedHash = hash1;
for (int i = 0; i < numHashFunctions; i++) {
// Make the combined hash positive and indexable
bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
combinedHash += hash2;
}
return bitsChanged;
}
@Override
public <T> boolean mightContain(
T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
long bitSize = bits.bitSize();
byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
long hash1 = lowerEight(bytes);
long hash2 = upperEight(bytes);
long combinedHash = hash1;
for (int i = 0; i < numHashFunctions; i++) {
// Make the combined hash positive and indexable
if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
return false;
}
combinedHash += hash2;
}
return true;
}
private /* static */ long lowerEight(byte[] bytes) {
return Longs.fromBytes(
bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
}
private /* static */ long upperEight(byte[] bytes) {
return Longs.fromBytes(
bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]);
}
};
/**
* Models a lock-free array of bits.
*
* <p>We use this instead of java.util.BitSet because we need access to the array of longs and we
* need compare-and-swap.
*/
static final class LockFreeBitArray {
private static final int LONG_ADDRESSABLE_BITS = 6;
final AtomicLongArray data;
private final LongAddable bitCount;
LockFreeBitArray(long bits) {
this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
}
// Used by serialization
LockFreeBitArray(long[] data) {
checkArgument(data.length > 0, "data length is zero!");
this.data = new AtomicLongArray(data);
this.bitCount = LongAddables.create();
long bitCount = 0;
for (long value : data) {
bitCount += Long.bitCount(value);
}
this.bitCount.add(bitCount);
}
/** Returns true if the bit changed value. */
boolean set(long bitIndex) {
if (get(bitIndex)) {
return false;
}
int longIndex = (int) (bitIndex >>> LONG_ADDRESSABLE_BITS);
long mask = 1L << bitIndex; // only cares about low 6 bits of bitIndex
long oldValue;
long newValue;
do {
oldValue = data.get(longIndex);
newValue = oldValue | mask;
if (oldValue == newValue) {
return false;
}
} while (!data.compareAndSet(longIndex, oldValue, newValue));
// We turned the bit on, so increment bitCount.
bitCount.increment();
return true;
}
boolean get(long bitIndex) {
return (data.get((int) (bitIndex >>> LONG_ADDRESSABLE_BITS)) & (1L << bitIndex)) != 0;
}
/**
* Careful here: if threads are mutating the atomicLongArray while this method is executing, the
* final long[] will be a "rolling snapshot" of the state of the bit array. This is usually good
* enough, but should be kept in mind.
*/
public static long[] toPlainArray(AtomicLongArray atomicLongArray) {
long[] array = new long[atomicLongArray.length()];
for (int i = 0; i < array.length; ++i) {
array[i] = atomicLongArray.get(i);
}
return array;
}
/** Number of bits */
long bitSize() {
return (long) data.length() * Long.SIZE;
}
/**
* Number of set bits (1s).
*
* <p>Note that because of concurrent set calls and uses of atomics, this bitCount is a (very)
* close *estimate* of the actual number of bits set. It's not possible to do better than an
* estimate without locking. Note that the number, if not exactly accurate, is *always*
* underestimating, never overestimating.
*/
long bitCount() {
return bitCount.sum();
}
LockFreeBitArray copy() {
return new LockFreeBitArray(toPlainArray(data));
}
/**
* Combines the two BitArrays using bitwise OR.
*
* <p>NOTE: Because of the use of atomics, if the other LockFreeBitArray is being mutated while
* this operation is executing, not all of those new 1's may be set in the final state of this
* LockFreeBitArray. The ONLY guarantee provided is that all the bits that were set in the other
* LockFreeBitArray at the start of this method will be set in this LockFreeBitArray at the end
* of this method.
*/
void putAll(LockFreeBitArray other) {
checkArgument(
data.length() == other.data.length(),
"BitArrays must be of equal length (%s != %s)",
data.length(),
other.data.length());
for (int i = 0; i < data.length(); i++) {
long otherLong = other.data.get(i);
long ourLongOld;
long ourLongNew;
boolean changedAnyBits = true;
do {
ourLongOld = data.get(i);
ourLongNew = ourLongOld | otherLong;
if (ourLongOld == ourLongNew) {
changedAnyBits = false;
break;
}
} while (!data.compareAndSet(i, ourLongOld, ourLongNew));
if (changedAnyBits) {
int bitsAdded = Long.bitCount(ourLongNew) - Long.bitCount(ourLongOld);
bitCount.add(bitsAdded);
}
}
}
@Override
public boolean equals(@Nullable Object o) {
if (o instanceof LockFreeBitArray) {
LockFreeBitArray lockFreeBitArray = (LockFreeBitArray) o;
// TODO(lowasser): avoid allocation here
return Arrays.equals(toPlainArray(data), toPlainArray(lockFreeBitArray.data));
}
return false;
}
@Override
public int hashCode() {
// TODO(lowasser): avoid allocation here
return Arrays.hashCode(toPlainArray(data));
}
}
}