blob: 3f96815b156232c15eeaf2af2a045be5f8521645 [file] [log] [blame]
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.setfilters.cuckoofilter;
import java.nio.ByteBuffer;
import java.util.Optional;
import java.util.Random;
/**
* Implementation of the {@link CuckooFilterTable} that doesn't use the semi-sorting bucket
* compression scheme in the original paper by Fan et al
* (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - see section 5.2 for what
* semi-sorting bucket compression scheme is.
*
* <p>Thus, if a bucket can hold up to bucketCapacity number of fingerprints and each fingerprint is
* of length fingerprintLength bits, it takes bucketCapacity * fingerprintLength bits to represent
* each bucket.
*/
final class UncompressedCuckooFilterTable implements CuckooFilterTable {
// Implementation type of the table, to be encoded in the serialization.
public static final int TABLE_TYPE = 0;
private final CuckooFilterConfig.Size size;
private final Random random;
private final CuckooFilterArray cuckooFilterArray;
/**
* Creates a new uncompressed cuckoo filter table of the given size.
*
* <p>Uses the given source of {@code random} to choose the replaced fingerprint in {@code
* insertWithReplacement} method.
*/
public UncompressedCuckooFilterTable(CuckooFilterConfig.Size size, Random random) {
this.size = size;
this.random = random;
// bucketCapacity <= 128 and fingerprintLength <= 64, so we can assume that it will always fit
// into a long.
cuckooFilterArray =
new CuckooFilterArray(
(long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength());
}
/** Creates {@link UncompressedCuckooFilterTable} from {@link SerializedCuckooFilterTable}. */
public UncompressedCuckooFilterTable(
CuckooFilterConfig.Size size, byte[] bitArray, Random random) {
this.size = size;
this.random = random;
cuckooFilterArray =
new CuckooFilterArray(
(long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength(), bitArray);
}
@Override
public Optional<Long> insertWithReplacement(int bucketIndex, long fingerprint) {
for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) {
long arrayIndex = toArrayIndex(bucketIndex, slotIndex);
if (cuckooFilterArray.getAsLong(arrayIndex) == CuckooFilterTable.EMPTY_SLOT) {
cuckooFilterArray.set(arrayIndex, fingerprint);
return Optional.empty();
}
}
int replacedSlotIndex = random.nextInt(size.bucketCapacity());
long replacedArrayIndex = toArrayIndex(bucketIndex, replacedSlotIndex);
long replacedFingerprint = cuckooFilterArray.getAsLong(replacedArrayIndex);
cuckooFilterArray.set(replacedArrayIndex, fingerprint);
return Optional.of(replacedFingerprint);
}
@Override
public boolean contains(int bucketIndex, long fingerprint) {
for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) {
long arrayIndex = toArrayIndex(bucketIndex, slotIndex);
if (cuckooFilterArray.getAsLong(arrayIndex) == fingerprint) {
return true;
}
}
return false;
}
@Override
public boolean delete(int bucketIndex, long fingerprint) {
for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) {
long arrayIndex = toArrayIndex(bucketIndex, slotIndex);
if (cuckooFilterArray.getAsLong(arrayIndex) == fingerprint) {
cuckooFilterArray.set(arrayIndex, CuckooFilterTable.EMPTY_SLOT);
return true;
}
}
return false;
}
@Override
public boolean isFull(int bucketIndex) {
return !contains(bucketIndex, CuckooFilterTable.EMPTY_SLOT);
}
@Override
public CuckooFilterConfig.Size size() {
return size;
}
@Override
public SerializedCuckooFilterTable serialize() {
byte[] serializedArray = cuckooFilterArray.toByteArray();
// The first 16 bytes specifies the implementation type and the size of the table (defined by
// tuple (type, bucketCount,
// bucketCapacity, fingerprintLength)).
// Rest is the bit array.
ByteBuffer encoded = ByteBuffer.allocate(16 + serializedArray.length);
return SerializedCuckooFilterTable.createFromByteArray(
encoded
.putInt(TABLE_TYPE)
.putInt(size.bucketCount())
.putInt(size.bucketCapacity())
.putInt(size.fingerprintLength())
.put(serializedArray)
.array());
}
private long toArrayIndex(int bucketIndex, int slotIndex) {
return (long) bucketIndex * size.bucketCapacity() + slotIndex;
}
}