| /* |
| * Copyright (C) 2011 The Guava Authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| * in compliance with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software distributed under the License |
| * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
| * or implied. See the License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| |
| /* |
| * MurmurHash3 was written by Austin Appleby, and is placed in the public |
| * domain. The author hereby disclaims copyright to this source code. |
| */ |
| |
| /* |
| * Source: |
| * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
| * (Modified to adapt to Guava coding conventions and to use the HashFunction interface) |
| */ |
| |
| package com.google.common.hash; |
| |
| import static com.google.common.base.Preconditions.checkPositionIndexes; |
| import static com.google.common.base.Preconditions.checkState; |
| import static com.google.common.primitives.UnsignedBytes.toInt; |
| |
| import com.google.common.base.Charsets; |
| import com.google.common.primitives.Chars; |
| import com.google.common.primitives.Ints; |
| import com.google.common.primitives.Longs; |
| import com.google.errorprone.annotations.CanIgnoreReturnValue; |
| import com.google.errorprone.annotations.Immutable; |
| import java.io.Serializable; |
| import java.nio.ByteBuffer; |
| import java.nio.ByteOrder; |
| import java.nio.charset.Charset; |
| import org.checkerframework.checker.nullness.compatqual.NullableDecl; |
| |
| /** |
| * See MurmurHash3_x86_32 in <a |
| * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">the C++ |
| * implementation</a>. |
| * |
| * @author Austin Appleby |
| * @author Dimitris Andreou |
| * @author Kurt Alfred Kluever |
| */ |
| @Immutable |
| final class Murmur3_32HashFunction extends AbstractHashFunction implements Serializable { |
| static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); |
| |
| static final HashFunction GOOD_FAST_HASH_32 = |
| new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED); |
| |
| private static final int CHUNK_SIZE = 4; |
| |
| private static final int C1 = 0xcc9e2d51; |
| private static final int C2 = 0x1b873593; |
| |
| private final int seed; |
| |
| Murmur3_32HashFunction(int seed) { |
| this.seed = seed; |
| } |
| |
| @Override |
| public int bits() { |
| return 32; |
| } |
| |
| @Override |
| public Hasher newHasher() { |
| return new Murmur3_32Hasher(seed); |
| } |
| |
| @Override |
| public String toString() { |
| return "Hashing.murmur3_32(" + seed + ")"; |
| } |
| |
| @Override |
| public boolean equals(@NullableDecl Object object) { |
| if (object instanceof Murmur3_32HashFunction) { |
| Murmur3_32HashFunction other = (Murmur3_32HashFunction) object; |
| return seed == other.seed; |
| } |
| return false; |
| } |
| |
| @Override |
| public int hashCode() { |
| return getClass().hashCode() ^ seed; |
| } |
| |
| @Override |
| public HashCode hashInt(int input) { |
| int k1 = mixK1(input); |
| int h1 = mixH1(seed, k1); |
| |
| return fmix(h1, Ints.BYTES); |
| } |
| |
| @Override |
| public HashCode hashLong(long input) { |
| int low = (int) input; |
| int high = (int) (input >>> 32); |
| |
| int k1 = mixK1(low); |
| int h1 = mixH1(seed, k1); |
| |
| k1 = mixK1(high); |
| h1 = mixH1(h1, k1); |
| |
| return fmix(h1, Longs.BYTES); |
| } |
| |
| @Override |
| public HashCode hashUnencodedChars(CharSequence input) { |
| int h1 = seed; |
| |
| // step through the CharSequence 2 chars at a time |
| for (int i = 1; i < input.length(); i += 2) { |
| int k1 = input.charAt(i - 1) | (input.charAt(i) << 16); |
| k1 = mixK1(k1); |
| h1 = mixH1(h1, k1); |
| } |
| |
| // deal with any remaining characters |
| if ((input.length() & 1) == 1) { |
| int k1 = input.charAt(input.length() - 1); |
| k1 = mixK1(k1); |
| h1 ^= k1; |
| } |
| |
| return fmix(h1, Chars.BYTES * input.length()); |
| } |
| |
| @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass |
| @Override |
| public HashCode hashString(CharSequence input, Charset charset) { |
| if (Charsets.UTF_8.equals(charset)) { |
| int utf16Length = input.length(); |
| int h1 = seed; |
| int i = 0; |
| int len = 0; |
| |
| // This loop optimizes for pure ASCII. |
| while (i + 4 <= utf16Length) { |
| char c0 = input.charAt(i); |
| char c1 = input.charAt(i + 1); |
| char c2 = input.charAt(i + 2); |
| char c3 = input.charAt(i + 3); |
| if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) { |
| int k1 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); |
| k1 = mixK1(k1); |
| h1 = mixH1(h1, k1); |
| i += 4; |
| len += 4; |
| } else { |
| break; |
| } |
| } |
| |
| long buffer = 0; |
| int shift = 0; |
| for (; i < utf16Length; i++) { |
| char c = input.charAt(i); |
| if (c < 0x80) { |
| buffer |= (long) c << shift; |
| shift += 8; |
| len++; |
| } else if (c < 0x800) { |
| buffer |= charToTwoUtf8Bytes(c) << shift; |
| shift += 16; |
| len += 2; |
| } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) { |
| buffer |= charToThreeUtf8Bytes(c) << shift; |
| shift += 24; |
| len += 3; |
| } else { |
| int codePoint = Character.codePointAt(input, i); |
| if (codePoint == c) { |
| // not a valid code point; let the JDK handle invalid Unicode |
| return hashBytes(input.toString().getBytes(charset)); |
| } |
| i++; |
| buffer |= codePointToFourUtf8Bytes(codePoint) << shift; |
| len += 4; |
| } |
| |
| if (shift >= 32) { |
| int k1 = mixK1((int) buffer); |
| h1 = mixH1(h1, k1); |
| buffer = buffer >>> 32; |
| shift -= 32; |
| } |
| } |
| |
| int k1 = mixK1((int) buffer); |
| h1 ^= k1; |
| return fmix(h1, len); |
| } else { |
| return hashBytes(input.toString().getBytes(charset)); |
| } |
| } |
| |
| @Override |
| public HashCode hashBytes(byte[] input, int off, int len) { |
| checkPositionIndexes(off, off + len, input.length); |
| int h1 = seed; |
| int i; |
| for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) { |
| int k1 = mixK1(getIntLittleEndian(input, off + i)); |
| h1 = mixH1(h1, k1); |
| } |
| |
| int k1 = 0; |
| for (int shift = 0; i < len; i++, shift += 8) { |
| k1 ^= toInt(input[off + i]) << shift; |
| } |
| h1 ^= mixK1(k1); |
| return fmix(h1, len); |
| } |
| |
| private static int getIntLittleEndian(byte[] input, int offset) { |
| return Ints.fromBytes(input[offset + 3], input[offset + 2], input[offset + 1], input[offset]); |
| } |
| |
| private static int mixK1(int k1) { |
| k1 *= C1; |
| k1 = Integer.rotateLeft(k1, 15); |
| k1 *= C2; |
| return k1; |
| } |
| |
| private static int mixH1(int h1, int k1) { |
| h1 ^= k1; |
| h1 = Integer.rotateLeft(h1, 13); |
| h1 = h1 * 5 + 0xe6546b64; |
| return h1; |
| } |
| |
| // Finalization mix - force all bits of a hash block to avalanche |
| private static HashCode fmix(int h1, int length) { |
| h1 ^= length; |
| h1 ^= h1 >>> 16; |
| h1 *= 0x85ebca6b; |
| h1 ^= h1 >>> 13; |
| h1 *= 0xc2b2ae35; |
| h1 ^= h1 >>> 16; |
| return HashCode.fromInt(h1); |
| } |
| |
| @CanIgnoreReturnValue |
| private static final class Murmur3_32Hasher extends AbstractHasher { |
| private int h1; |
| private long buffer; |
| private int shift; |
| private int length; |
| private boolean isDone; |
| |
| Murmur3_32Hasher(int seed) { |
| this.h1 = seed; |
| this.length = 0; |
| isDone = false; |
| } |
| |
| private void update(int nBytes, long update) { |
| // 1 <= nBytes <= 4 |
| buffer |= (update & 0xFFFFFFFFL) << shift; |
| shift += nBytes * 8; |
| length += nBytes; |
| |
| if (shift >= 32) { |
| h1 = mixH1(h1, mixK1((int) buffer)); |
| buffer >>>= 32; |
| shift -= 32; |
| } |
| } |
| |
| @Override |
| public Hasher putByte(byte b) { |
| update(1, b & 0xFF); |
| return this; |
| } |
| |
| @Override |
| public Hasher putBytes(byte[] bytes, int off, int len) { |
| checkPositionIndexes(off, off + len, bytes.length); |
| int i; |
| for (i = 0; i + 4 <= len; i += 4) { |
| update(4, getIntLittleEndian(bytes, off + i)); |
| } |
| for (; i < len; i++) { |
| putByte(bytes[off + i]); |
| } |
| return this; |
| } |
| |
| @Override |
| public Hasher putBytes(ByteBuffer buffer) { |
| ByteOrder bo = buffer.order(); |
| buffer.order(ByteOrder.LITTLE_ENDIAN); |
| while (buffer.remaining() >= 4) { |
| putInt(buffer.getInt()); |
| } |
| while (buffer.hasRemaining()) { |
| putByte(buffer.get()); |
| } |
| buffer.order(bo); |
| return this; |
| } |
| |
| @Override |
| public Hasher putInt(int i) { |
| update(4, i); |
| return this; |
| } |
| |
| @Override |
| public Hasher putLong(long l) { |
| update(4, (int) l); |
| update(4, l >>> 32); |
| return this; |
| } |
| |
| @Override |
| public Hasher putChar(char c) { |
| update(2, c); |
| return this; |
| } |
| |
| @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass |
| @Override |
| public Hasher putString(CharSequence input, Charset charset) { |
| if (Charsets.UTF_8.equals(charset)) { |
| int utf16Length = input.length(); |
| int i = 0; |
| |
| // This loop optimizes for pure ASCII. |
| while (i + 4 <= utf16Length) { |
| char c0 = input.charAt(i); |
| char c1 = input.charAt(i + 1); |
| char c2 = input.charAt(i + 2); |
| char c3 = input.charAt(i + 3); |
| if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) { |
| update(4, c0 | (c1 << 8) | (c2 << 16) | (c3 << 24)); |
| i += 4; |
| } else { |
| break; |
| } |
| } |
| |
| for (; i < utf16Length; i++) { |
| char c = input.charAt(i); |
| if (c < 0x80) { |
| update(1, c); |
| } else if (c < 0x800) { |
| update(2, charToTwoUtf8Bytes(c)); |
| } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) { |
| update(3, charToThreeUtf8Bytes(c)); |
| } else { |
| int codePoint = Character.codePointAt(input, i); |
| if (codePoint == c) { |
| // fall back to JDK getBytes instead of trying to handle invalid surrogates ourselves |
| putBytes(input.subSequence(i, utf16Length).toString().getBytes(charset)); |
| return this; |
| } |
| i++; |
| update(4, codePointToFourUtf8Bytes(codePoint)); |
| } |
| } |
| return this; |
| } else { |
| return super.putString(input, charset); |
| } |
| } |
| |
| @Override |
| public HashCode hash() { |
| checkState(!isDone); |
| isDone = true; |
| h1 ^= mixK1((int) buffer); |
| return fmix(h1, length); |
| } |
| } |
| |
| private static long codePointToFourUtf8Bytes(int codePoint) { |
| return (((0xFL << 4) | (codePoint >>> 18)) & 0xFF) |
| | ((0x80L | (0x3F & (codePoint >>> 12))) << 8) |
| | ((0x80L | (0x3F & (codePoint >>> 6))) << 16) |
| | ((0x80L | (0x3F & codePoint)) << 24); |
| } |
| |
| private static long charToThreeUtf8Bytes(char c) { |
| return (((0xF << 5) | (c >>> 12)) & 0xFF) |
| | ((0x80 | (0x3F & (c >>> 6))) << 8) |
| | ((0x80 | (0x3F & c)) << 16); |
| } |
| |
| private static long charToTwoUtf8Bytes(char c) { |
| return (((0xF << 6) | (c >>> 6)) & 0xFF) | ((0x80 | (0x3F & c)) << 8); |
| } |
| |
| private static final long serialVersionUID = 0L; |
| } |