| /* |
| * Copyright (C) 2015 The Guava Authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.common.hash; |
| |
| import static com.google.common.base.Charsets.ISO_8859_1; |
| import static com.google.common.base.Charsets.UTF_8; |
| import static com.google.common.truth.Truth.assertThat; |
| |
| import com.google.common.base.Strings; |
| import java.util.Arrays; |
| import junit.framework.TestCase; |
| |
| /** |
| * Unit test for FarmHashFingerprint64. |
| * |
| * @author Kyle Maddison |
| * @author Geoff Pike |
| */ |
| public class FarmHashFingerprint64Test extends TestCase { |
| |
| private static final HashFunction HASH_FN = Hashing.farmHashFingerprint64(); |
| |
| // If this test fails, all bets are off |
| public void testReallySimpleFingerprints() { |
| assertEquals(8581389452482819506L, fingerprint("test".getBytes(UTF_8))); |
| // 32 characters long |
| assertEquals(-4196240717365766262L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8))); |
| // 256 characters long |
| assertEquals(3500507768004279527L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8))); |
| } |
| |
| public void testStringsConsistency() { |
| for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) { |
| assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s)); |
| } |
| } |
| |
| public void testUtf8() { |
| char[] charsA = new char[128]; |
| char[] charsB = new char[128]; |
| |
| for (int i = 0; i < charsA.length; i++) { |
| if (i < 100) { |
| charsA[i] = 'a'; |
| charsB[i] = 'a'; |
| } else { |
| // Both two-byte characters, but must be different |
| charsA[i] = (char) (0x0180 + i); |
| charsB[i] = (char) (0x0280 + i); |
| } |
| } |
| |
| String stringA = new String(charsA); |
| String stringB = new String(charsB); |
| assertThat(stringA).isNotEqualTo(stringB); |
| assertThat(HASH_FN.hashUnencodedChars(stringA)) |
| .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB)); |
| assertThat(fingerprint(stringA.getBytes(UTF_8))) |
| .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8))); |
| |
| // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters |
| // greater than 127 (ie with their top bit set). |
| // Don't attempt to do this in real code. |
| assertEquals( |
| fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1))); |
| } |
| |
| public void testPutNonChars() { |
| Hasher hasher = HASH_FN.newHasher(); |
| // Expected data is 0x0100010100000000 |
| hasher |
| .putBoolean(true) |
| .putBoolean(true) |
| .putBoolean(false) |
| .putBoolean(true) |
| .putBoolean(false) |
| .putBoolean(false) |
| .putBoolean(false) |
| .putBoolean(false); |
| final long hashCode = hasher.hash().asLong(); |
| |
| hasher = HASH_FN.newHasher(); |
| hasher |
| .putByte((byte) 0x01) |
| .putByte((byte) 0x01) |
| .putByte((byte) 0x00) |
| .putByte((byte) 0x01) |
| .putByte((byte) 0x00) |
| .putByte((byte) 0x00) |
| .putByte((byte) 0x00) |
| .putByte((byte) 0x00); |
| assertEquals(hashCode, hasher.hash().asLong()); |
| |
| hasher = HASH_FN.newHasher(); |
| hasher |
| .putChar((char) 0x0101) |
| .putChar((char) 0x0100) |
| .putChar((char) 0x0000) |
| .putChar((char) 0x0000); |
| assertEquals(hashCode, hasher.hash().asLong()); |
| |
| hasher = HASH_FN.newHasher(); |
| hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00}); |
| assertEquals(hashCode, hasher.hash().asLong()); |
| |
| hasher = HASH_FN.newHasher(); |
| hasher.putLong(0x0000000001000101L); |
| assertEquals(hashCode, hasher.hash().asLong()); |
| |
| hasher = HASH_FN.newHasher(); |
| hasher |
| .putShort((short) 0x0101) |
| .putShort((short) 0x0100) |
| .putShort((short) 0x0000) |
| .putShort((short) 0x0000); |
| assertEquals(hashCode, hasher.hash().asLong()); |
| } |
| |
| public void testHashFloatIsStable() { |
| // Just a spot check. Better than nothing. |
| Hasher hasher = HASH_FN.newHasher(); |
| hasher.putFloat(0x01000101f).putFloat(0f); |
| assertEquals(0x49f9d18ee8ae1b28L, hasher.hash().asLong()); |
| |
| hasher = HASH_FN.newHasher(); |
| hasher.putDouble(0x0000000001000101d); |
| assertEquals(0x388ee898bad75cbfL, hasher.hash().asLong()); |
| } |
| |
| /** Convenience method to compute a fingerprint on a full bytes array. */ |
| private static long fingerprint(byte[] bytes) { |
| return fingerprint(bytes, bytes.length); |
| } |
| |
| /** Convenience method to compute a fingerprint on a subset of a byte array. */ |
| private static long fingerprint(byte[] bytes, int length) { |
| return HASH_FN.hashBytes(bytes, 0, length).asLong(); |
| } |
| |
| /** |
| * Tests that the Java port of FarmHashFingerprint64 provides the same results on buffers up to |
| * 800 bytes long as the C++ reference implementation. |
| */ |
| public void testMultipleLengths() { |
| int iterations = 800; |
| byte[] buf = new byte[iterations * 4]; |
| int bufLen = 0; |
| long h = 0; |
| for (int i = 0; i < iterations; ++i) { |
| h ^= fingerprint(buf, i); |
| h = remix(h); |
| buf[bufLen++] = getChar(h); |
| |
| h ^= fingerprint(buf, i * i % bufLen); |
| h = remix(h); |
| buf[bufLen++] = getChar(h); |
| |
| h ^= fingerprint(buf, i * i * i % bufLen); |
| h = remix(h); |
| buf[bufLen++] = getChar(h); |
| |
| h ^= fingerprint(buf, bufLen); |
| h = remix(h); |
| buf[bufLen++] = getChar(h); |
| |
| int x0 = buf[bufLen - 1] & 0xff; |
| int x1 = buf[bufLen - 2] & 0xff; |
| int x2 = buf[bufLen - 3] & 0xff; |
| int x3 = buf[bufLen / 2] & 0xff; |
| buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3; |
| buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256; |
| } |
| assertEquals(0x7a1d67c50ec7e167L, h); |
| } |
| |
| private static long remix(long h) { |
| h ^= h >>> 41; |
| h *= 949921979; |
| return h; |
| } |
| |
| private static byte getChar(long h) { |
| return (byte) ('a' + ((h & 0xfffff) % 26)); |
| } |
| } |