| //----------------------------------------------------------------------------- |
| // Flipping a single bit of a key should cause an "avalanche" of changes in |
| // the hash function's output. Ideally, each output bits should flip 50% of |
| // the time - if the probability of an output bit flipping is not 50%, that bit |
| // is "biased". Too much bias means that patterns applied to the input will |
| // cause "echoes" of the patterns in the output, which in turn can cause the |
| // hash function to fail to create an even, random distribution of hash values. |
| |
| |
| #pragma once |
| |
| #include "Types.h" |
| #include "Random.h" |
| |
| #include <vector> |
| #include <stdio.h> |
| #include <math.h> |
| |
| // Avalanche fails if a bit is biased by more than 1% |
| |
| #define AVALANCHE_FAIL 0.01 |
| |
| double maxBias ( std::vector<int> & counts, int reps ); |
| |
| //----------------------------------------------------------------------------- |
| |
| template < typename keytype, typename hashtype > |
| void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r ) |
| { |
| const int keybytes = sizeof(keytype); |
| const int hashbytes = sizeof(hashtype); |
| |
| const int keybits = keybytes * 8; |
| const int hashbits = hashbytes * 8; |
| |
| keytype K; |
| hashtype A,B; |
| |
| for(int irep = 0; irep < reps; irep++) |
| { |
| if(irep % (reps/10) == 0) printf("."); |
| |
| r.rand_p(&K,keybytes); |
| |
| hash(&K,keybytes,0,&A); |
| |
| int * cursor = &counts[0]; |
| |
| for(int iBit = 0; iBit < keybits; iBit++) |
| { |
| flipbit(&K,keybytes,iBit); |
| hash(&K,keybytes,0,&B); |
| flipbit(&K,keybytes,iBit); |
| |
| for(int iOut = 0; iOut < hashbits; iOut++) |
| { |
| int bitA = getbit(&A,hashbytes,iOut); |
| int bitB = getbit(&B,hashbytes,iOut); |
| |
| (*cursor++) += (bitA ^ bitB); |
| } |
| } |
| } |
| } |
| |
| //----------------------------------------------------------------------------- |
| |
| template < typename keytype, typename hashtype > |
| bool AvalancheTest ( pfHash hash, const int reps ) |
| { |
| Rand r(48273); |
| |
| const int keybytes = sizeof(keytype); |
| const int hashbytes = sizeof(hashtype); |
| |
| const int keybits = keybytes * 8; |
| const int hashbits = hashbytes * 8; |
| |
| printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps); |
| |
| //---------- |
| |
| std::vector<int> bins(keybits*hashbits,0); |
| |
| calcBias<keytype,hashtype>(hash,bins,reps,r); |
| |
| //---------- |
| |
| bool result = true; |
| |
| double b = maxBias(bins,reps); |
| |
| printf(" worst bias is %f%%",b * 100.0); |
| |
| if(b > AVALANCHE_FAIL) |
| { |
| printf(" !!!!! "); |
| result = false; |
| } |
| |
| printf("\n"); |
| |
| return result; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and |
| // not really all that useful. |
| |
| template< typename keytype, typename hashtype > |
| void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose ) |
| { |
| Rand r(11938); |
| |
| const int keybytes = sizeof(keytype); |
| const int hashbytes = sizeof(hashtype); |
| const int hashbits = hashbytes * 8; |
| |
| std::vector<int> bins(hashbits*hashbits*4,0); |
| |
| keytype key; |
| hashtype h1,h2; |
| |
| for(int irep = 0; irep < reps; irep++) |
| { |
| if(verbose) |
| { |
| if(irep % (reps/10) == 0) printf("."); |
| } |
| |
| r.rand_p(&key,keybytes); |
| hash(&key,keybytes,0,&h1); |
| |
| flipbit(key,keybit); |
| hash(&key,keybytes,0,&h2); |
| |
| hashtype d = h1 ^ h2; |
| |
| for(int out1 = 0; out1 < hashbits; out1++) |
| for(int out2 = 0; out2 < hashbits; out2++) |
| { |
| if(out1 == out2) continue; |
| |
| uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1); |
| |
| bins[(out1 * hashbits + out2) * 4 + b]++; |
| } |
| } |
| |
| if(verbose) printf("\n"); |
| |
| maxBias = 0; |
| |
| for(int out1 = 0; out1 < hashbits; out1++) |
| { |
| for(int out2 = 0; out2 < hashbits; out2++) |
| { |
| if(out1 == out2) |
| { |
| if(verbose) printf("\\"); |
| continue; |
| } |
| |
| double bias = 0; |
| |
| for(int b = 0; b < 4; b++) |
| { |
| double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2); |
| b2 = fabs(b2 * 2 - 1); |
| |
| if(b2 > bias) bias = b2; |
| } |
| |
| if(bias > maxBias) |
| { |
| maxBias = bias; |
| maxA = out1; |
| maxB = out2; |
| } |
| |
| if(verbose) |
| { |
| if (bias < 0.01) printf("."); |
| else if(bias < 0.05) printf("o"); |
| else if(bias < 0.33) printf("O"); |
| else printf("X"); |
| } |
| } |
| |
| if(verbose) printf("\n"); |
| } |
| } |
| |
| //---------- |
| |
| template< typename keytype, typename hashtype > |
| bool BicTest ( pfHash hash, const int reps ) |
| { |
| const int keybytes = sizeof(keytype); |
| const int keybits = keybytes * 8; |
| |
| double maxBias = 0; |
| int maxK = 0; |
| int maxA = 0; |
| int maxB = 0; |
| |
| for(int i = 0; i < keybits; i++) |
| { |
| if(i % (keybits/10) == 0) printf("."); |
| |
| double bias; |
| int a,b; |
| |
| BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true); |
| |
| if(bias > maxBias) |
| { |
| maxBias = bias; |
| maxK = i; |
| maxA = a; |
| maxB = b; |
| } |
| } |
| |
| printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); |
| |
| // Bit independence is harder to pass than avalanche, so we're a bit more lax here. |
| |
| bool result = (maxBias < 0.05); |
| |
| return result; |
| } |
| |
| //----------------------------------------------------------------------------- |
| // BIC test variant - store all intermediate data in a table, draw diagram |
| // afterwards (much faster) |
| |
| template< typename keytype, typename hashtype > |
| void BicTest3 ( pfHash hash, const int reps, bool verbose = true ) |
| { |
| const int keybytes = sizeof(keytype); |
| const int keybits = keybytes * 8; |
| const int hashbytes = sizeof(hashtype); |
| const int hashbits = hashbytes * 8; |
| const int pagesize = hashbits*hashbits*4; |
| |
| Rand r(11938); |
| |
| double maxBias = 0; |
| int maxK = 0; |
| int maxA = 0; |
| int maxB = 0; |
| |
| keytype key; |
| hashtype h1,h2; |
| |
| std::vector<int> bins(keybits*pagesize,0); |
| |
| for(int keybit = 0; keybit < keybits; keybit++) |
| { |
| if(keybit % (keybits/10) == 0) printf("."); |
| |
| int * page = &bins[keybit*pagesize]; |
| |
| for(int irep = 0; irep < reps; irep++) |
| { |
| r.rand_p(&key,keybytes); |
| hash(&key,keybytes,0,&h1); |
| flipbit(key,keybit); |
| hash(&key,keybytes,0,&h2); |
| |
| hashtype d = h1 ^ h2; |
| |
| for(int out1 = 0; out1 < hashbits-1; out1++) |
| for(int out2 = out1+1; out2 < hashbits; out2++) |
| { |
| int * b = &page[(out1*hashbits+out2)*4]; |
| |
| uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1); |
| |
| b[x]++; |
| } |
| } |
| } |
| |
| printf("\n"); |
| |
| for(int out1 = 0; out1 < hashbits-1; out1++) |
| { |
| for(int out2 = out1+1; out2 < hashbits; out2++) |
| { |
| if(verbose) printf("(%3d,%3d) - ",out1,out2); |
| |
| for(int keybit = 0; keybit < keybits; keybit++) |
| { |
| int * page = &bins[keybit*pagesize]; |
| int * bins = &page[(out1*hashbits+out2)*4]; |
| |
| double bias = 0; |
| |
| for(int b = 0; b < 4; b++) |
| { |
| double b2 = double(bins[b]) / double(reps / 2); |
| b2 = fabs(b2 * 2 - 1); |
| |
| if(b2 > bias) bias = b2; |
| } |
| |
| if(bias > maxBias) |
| { |
| maxBias = bias; |
| maxK = keybit; |
| maxA = out1; |
| maxB = out2; |
| } |
| |
| if(verbose) |
| { |
| if (bias < 0.01) printf("."); |
| else if(bias < 0.05) printf("o"); |
| else if(bias < 0.33) printf("O"); |
| else printf("X"); |
| } |
| } |
| |
| // Finished keybit |
| |
| if(verbose) printf("\n"); |
| } |
| |
| if(verbose) |
| { |
| for(int i = 0; i < keybits+12; i++) printf("-"); |
| printf("\n"); |
| } |
| } |
| |
| printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); |
| } |
| |
| |
| //----------------------------------------------------------------------------- |
| // BIC test variant - iterate over output bits, then key bits. No temp storage, |
| // but slooooow |
| |
| template< typename keytype, typename hashtype > |
| void BicTest2 ( pfHash hash, const int reps, bool verbose = true ) |
| { |
| const int keybytes = sizeof(keytype); |
| const int keybits = keybytes * 8; |
| const int hashbytes = sizeof(hashtype); |
| const int hashbits = hashbytes * 8; |
| |
| Rand r(11938); |
| |
| double maxBias = 0; |
| int maxK = 0; |
| int maxA = 0; |
| int maxB = 0; |
| |
| keytype key; |
| hashtype h1,h2; |
| |
| for(int out1 = 0; out1 < hashbits-1; out1++) |
| for(int out2 = out1+1; out2 < hashbits; out2++) |
| { |
| if(verbose) printf("(%3d,%3d) - ",out1,out2); |
| |
| for(int keybit = 0; keybit < keybits; keybit++) |
| { |
| int bins[4] = { 0, 0, 0, 0 }; |
| |
| for(int irep = 0; irep < reps; irep++) |
| { |
| r.rand_p(&key,keybytes); |
| hash(&key,keybytes,0,&h1); |
| flipbit(key,keybit); |
| hash(&key,keybytes,0,&h2); |
| |
| hashtype d = h1 ^ h2; |
| |
| uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1); |
| |
| bins[b]++; |
| } |
| |
| double bias = 0; |
| |
| for(int b = 0; b < 4; b++) |
| { |
| double b2 = double(bins[b]) / double(reps / 2); |
| b2 = fabs(b2 * 2 - 1); |
| |
| if(b2 > bias) bias = b2; |
| } |
| |
| if(bias > maxBias) |
| { |
| maxBias = bias; |
| maxK = keybit; |
| maxA = out1; |
| maxB = out2; |
| } |
| |
| if(verbose) |
| { |
| if (bias < 0.05) printf("."); |
| else if(bias < 0.10) printf("o"); |
| else if(bias < 0.50) printf("O"); |
| else printf("X"); |
| } |
| } |
| |
| // Finished keybit |
| |
| if(verbose) printf("\n"); |
| } |
| |
| printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); |
| } |
| |
| //----------------------------------------------------------------------------- |