blob: 30eab5fadf8eac156c76bc82d5f77d37a4c4670c [file] [log] [blame]
/*
* Copyright (C) 2015 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static com.google.common.hash.LittleEndianByteArray.load32;
import static com.google.common.hash.LittleEndianByteArray.load64;
import static java.lang.Long.rotateRight;
import com.google.common.annotations.VisibleForTesting;
/**
* Implementation of FarmHash Fingerprint64, an open-source fingerprinting algorithm for strings.
*
* <p>Its speed is comparable to CityHash64, and its quality of hashing is at least as good.
*
* <p>Note to maintainers: This implementation relies on signed arithmetic being bit-wise equivalent
* to unsigned arithmetic in all cases except:
*
* <ul>
* <li>comparisons (signed values can be negative)
* <li>division (avoided here)
* <li>shifting (right shift must be unsigned)
* </ul>
*
* @author Kyle Maddison
* @author Geoff Pike
*/
final class FarmHashFingerprint64 extends AbstractNonStreamingHashFunction {
static final HashFunction FARMHASH_FINGERPRINT_64 = new FarmHashFingerprint64();
// Some primes between 2^63 and 2^64 for various uses.
private static final long K0 = 0xc3a5c85c97cb3127L;
private static final long K1 = 0xb492b66fbe98f273L;
private static final long K2 = 0x9ae16a3b2f90404fL;
@Override
public HashCode hashBytes(byte[] input, int off, int len) {
checkPositionIndexes(off, off + len, input.length);
return HashCode.fromLong(fingerprint(input, off, len));
}
@Override
public int bits() {
return 64;
}
@Override
public String toString() {
return "Hashing.farmHashFingerprint64()";
}
// End of public functions.
@VisibleForTesting
static long fingerprint(byte[] bytes, int offset, int length) {
if (length <= 32) {
if (length <= 16) {
return hashLength0to16(bytes, offset, length);
} else {
return hashLength17to32(bytes, offset, length);
}
} else if (length <= 64) {
return hashLength33To64(bytes, offset, length);
} else {
return hashLength65Plus(bytes, offset, length);
}
}
private static long shiftMix(long val) {
return val ^ (val >>> 47);
}
private static long hashLength16(long u, long v, long mul) {
long a = (u ^ v) * mul;
a ^= (a >>> 47);
long b = (v ^ a) * mul;
b ^= (b >>> 47);
b *= mul;
return b;
}
/**
* Computes intermediate hash of 32 bytes of byte array from the given offset. Results are
* returned in the output array because when we last measured, this was 12% faster than allocating
* new arrays every time.
*/
private static void weakHashLength32WithSeeds(
byte[] bytes, int offset, long seedA, long seedB, long[] output) {
long part1 = load64(bytes, offset);
long part2 = load64(bytes, offset + 8);
long part3 = load64(bytes, offset + 16);
long part4 = load64(bytes, offset + 24);
seedA += part1;
seedB = rotateRight(seedB + seedA + part4, 21);
long c = seedA;
seedA += part2;
seedA += part3;
seedB += rotateRight(seedA, 44);
output[0] = seedA + part4;
output[1] = seedB + c;
}
private static long hashLength0to16(byte[] bytes, int offset, int length) {
if (length >= 8) {
long mul = K2 + length * 2;
long a = load64(bytes, offset) + K2;
long b = load64(bytes, offset + length - 8);
long c = rotateRight(b, 37) * mul + a;
long d = (rotateRight(a, 25) + b) * mul;
return hashLength16(c, d, mul);
}
if (length >= 4) {
long mul = K2 + length * 2;
long a = load32(bytes, offset) & 0xFFFFFFFFL;
return hashLength16(length + (a << 3), load32(bytes, offset + length - 4) & 0xFFFFFFFFL, mul);
}
if (length > 0) {
byte a = bytes[offset];
byte b = bytes[offset + (length >> 1)];
byte c = bytes[offset + (length - 1)];
int y = (a & 0xFF) + ((b & 0xFF) << 8);
int z = length + ((c & 0xFF) << 2);
return shiftMix(y * K2 ^ z * K0) * K2;
}
return K2;
}
private static long hashLength17to32(byte[] bytes, int offset, int length) {
long mul = K2 + length * 2;
long a = load64(bytes, offset) * K1;
long b = load64(bytes, offset + 8);
long c = load64(bytes, offset + length - 8) * mul;
long d = load64(bytes, offset + length - 16) * K2;
return hashLength16(
rotateRight(a + b, 43) + rotateRight(c, 30) + d, a + rotateRight(b + K2, 18) + c, mul);
}
private static long hashLength33To64(byte[] bytes, int offset, int length) {
long mul = K2 + length * 2;
long a = load64(bytes, offset) * K2;
long b = load64(bytes, offset + 8);
long c = load64(bytes, offset + length - 8) * mul;
long d = load64(bytes, offset + length - 16) * K2;
long y = rotateRight(a + b, 43) + rotateRight(c, 30) + d;
long z = hashLength16(y, a + rotateRight(b + K2, 18) + c, mul);
long e = load64(bytes, offset + 16) * mul;
long f = load64(bytes, offset + 24);
long g = (y + load64(bytes, offset + length - 32)) * mul;
long h = (z + load64(bytes, offset + length - 24)) * mul;
return hashLength16(
rotateRight(e + f, 43) + rotateRight(g, 30) + h, e + rotateRight(f + a, 18) + g, mul);
}
/*
* Compute an 8-byte hash of a byte array of length greater than 64 bytes.
*/
private static long hashLength65Plus(byte[] bytes, int offset, int length) {
final int seed = 81;
// For strings over 64 bytes we loop. Internal state consists of 56 bytes: v, w, x, y, and z.
long x = seed;
@SuppressWarnings("ConstantOverflow")
long y = seed * K1 + 113;
long z = shiftMix(y * K2 + 113) * K2;
long[] v = new long[2];
long[] w = new long[2];
x = x * K2 + load64(bytes, offset);
// Set end so that after the loop we have 1 to 64 bytes left to process.
int end = offset + ((length - 1) / 64) * 64;
int last64offset = end + ((length - 1) & 63) - 63;
do {
x = rotateRight(x + y + v[0] + load64(bytes, offset + 8), 37) * K1;
y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * K1;
x ^= w[1];
y += v[0] + load64(bytes, offset + 40);
z = rotateRight(z + w[0], 33) * K1;
weakHashLength32WithSeeds(bytes, offset, v[1] * K1, x + w[0], v);
weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y + load64(bytes, offset + 16), w);
long tmp = x;
x = z;
z = tmp;
offset += 64;
} while (offset != end);
long mul = K1 + ((z & 0xFF) << 1);
// Operate on the last 64 bytes of input.
offset = last64offset;
w[0] += ((length - 1) & 63);
v[0] += w[0];
w[0] += v[0];
x = rotateRight(x + y + v[0] + load64(bytes, offset + 8), 37) * mul;
y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * mul;
x ^= w[1] * 9;
y += v[0] * 9 + load64(bytes, offset + 40);
z = rotateRight(z + w[0], 33) * mul;
weakHashLength32WithSeeds(bytes, offset, v[1] * mul, x + w[0], v);
weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y + load64(bytes, offset + 16), w);
return hashLength16(
hashLength16(v[0], w[0], mul) + shiftMix(y) * K0 + x,
hashLength16(v[1], w[1], mul) + z,
mul);
}
}