Merge remote-tracking branch 'origin/kitkat-dev'
diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
new file mode 100644
index 0000000..f5ea0df
--- /dev/null
+++ b/AvalancheTest.cpp
@@ -0,0 +1,56 @@
+#include "AvalancheTest.h"
+
+//-----------------------------------------------------------------------------
+
+void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
+{
+  const char * symbols = ".123456789X";
+
+  for(int i = 0; i < y; i++)
+  {
+    printf("[");
+    for(int j = 0; j < x; j++)
+    {
+      int k = (y - i) -1;
+
+      int bin = bins[k + (j*y)];
+
+      double b = double(bin) / double(reps);
+      b = fabs(b*2 - 1);
+
+      b *= scale;
+
+      int s = (int)floor(b*10);
+
+      if(s > 10) s = 10;
+      if(s < 0) s = 0;
+
+      printf("%c",symbols[s]);
+    }
+
+    printf("]\n");
+  }
+}
+
+//----------------------------------------------------------------------------
+
+double maxBias ( std::vector<int> & counts, int reps )
+{
+  double worst = 0;
+
+  for(int i = 0; i < (int)counts.size(); i++)
+  {
+    double c = double(counts[i]) / double(reps);
+
+    double d = fabs(c * 2 - 1);
+      
+    if(d > worst)
+    {
+      worst = d;
+    }
+  }
+
+  return worst;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/AvalancheTest.h b/AvalancheTest.h
new file mode 100644
index 0000000..f1bfeea
--- /dev/null
+++ b/AvalancheTest.h
@@ -0,0 +1,422 @@
+//-----------------------------------------------------------------------------
+// Flipping a single bit of a key should cause an "avalanche" of changes in
+// the hash function's output. Ideally, each output bits should flip 50% of
+// the time - if the probability of an output bit flipping is not 50%, that bit
+// is "biased". Too much bias means that patterns applied to the input will
+// cause "echoes" of the patterns in the output, which in turn can cause the
+// hash function to fail to create an even, random distribution of hash values.
+
+
+#pragma once
+
+#include "Types.h"
+#include "Random.h"
+
+#include <vector>
+#include <stdio.h>
+#include <math.h>
+
+// Avalanche fails if a bit is biased by more than 1%
+
+#define AVALANCHE_FAIL 0.01
+
+double maxBias ( std::vector<int> & counts, int reps );
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
+{
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
+
+  keytype K;
+  hashtype A,B;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    r.rand_p(&K,keybytes);
+
+    hash(&K,keybytes,0,&A);
+
+    int * cursor = &counts[0];
+
+    for(int iBit = 0; iBit < keybits; iBit++)
+    {
+      flipbit(&K,keybytes,iBit);
+      hash(&K,keybytes,0,&B);
+      flipbit(&K,keybytes,iBit);
+
+      for(int iOut = 0; iOut < hashbits; iOut++)
+      {
+        int bitA = getbit(&A,hashbytes,iOut);
+        int bitB = getbit(&B,hashbytes,iOut);
+
+        (*cursor++) += (bitA ^ bitB);
+      }
+    }
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+bool AvalancheTest ( pfHash hash, const int reps )
+{
+  Rand r(48273);
+  
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
+
+  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
+
+  //----------
+
+  std::vector<int> bins(keybits*hashbits,0);
+
+  calcBias<keytype,hashtype>(hash,bins,reps,r);
+  
+  //----------
+
+  bool result = true;
+
+  double b = maxBias(bins,reps);
+
+  printf(" worst bias is %f%%",b * 100.0);
+
+  if(b > AVALANCHE_FAIL)
+  {
+    printf(" !!!!! ");
+    result = false;
+  }
+
+  printf("\n");
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
+// not really all that useful.
+
+template< typename keytype, typename hashtype >
+void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
+{
+  Rand r(11938);
+  
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  std::vector<int> bins(hashbits*hashbits*4,0);
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(verbose)
+    {
+      if(irep % (reps/10) == 0) printf(".");
+    }
+
+    r.rand_p(&key,keybytes);
+    hash(&key,keybytes,0,&h1);
+
+    flipbit(key,keybit);
+    hash(&key,keybytes,0,&h2);
+
+    hashtype d = h1 ^ h2;
+
+    for(int out1 = 0; out1 < hashbits; out1++)
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2) continue;
+
+      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+      bins[(out1 * hashbits + out2) * 4 + b]++;
+    }
+  }
+
+  if(verbose) printf("\n");
+
+  maxBias = 0;
+
+  for(int out1 = 0; out1 < hashbits; out1++)
+  {
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2)
+      {
+        if(verbose) printf("\\");
+        continue;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.01) printf(".");
+        else if(bias < 0.05) printf("o");
+        else if(bias < 0.33) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    if(verbose) printf("\n");
+  }
+}
+
+//----------
+
+template< typename keytype, typename hashtype >
+bool BicTest ( pfHash hash, const int reps )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  for(int i = 0; i < keybits; i++)
+  {
+    if(i % (keybits/10) == 0) printf(".");
+
+    double bias;
+    int a,b;
+    
+    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
+
+    if(bias > maxBias)
+    {
+      maxBias = bias;
+      maxK = i;
+      maxA = a;
+      maxB = b;
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+
+  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+
+  bool result = (maxBias < 0.05);
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// BIC test variant - store all intermediate data in a table, draw diagram
+// afterwards (much faster)
+
+template< typename keytype, typename hashtype >
+void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int pagesize = hashbits*hashbits*4;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  std::vector<int> bins(keybits*pagesize,0);
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    if(keybit % (keybits/10) == 0) printf(".");
+
+    int * page = &bins[keybit*pagesize];
+
+    for(int irep = 0; irep < reps; irep++)
+    {
+      r.rand_p(&key,keybytes);
+      hash(&key,keybytes,0,&h1);
+      flipbit(key,keybit);
+      hash(&key,keybytes,0,&h2);
+
+      hashtype d = h1 ^ h2;
+
+      for(int out1 = 0; out1 < hashbits-1; out1++)
+      for(int out2 = out1+1; out2 < hashbits; out2++)
+      {
+        int * b = &page[(out1*hashbits+out2)*4];
+
+        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        b[x]++;
+      }
+    }
+  }
+
+  printf("\n");
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  {
+    for(int out2 = out1+1; out2 < hashbits; out2++)
+    {
+      if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+      for(int keybit = 0; keybit < keybits; keybit++)
+      {
+        int * page = &bins[keybit*pagesize];
+        int * bins = &page[(out1*hashbits+out2)*4];
+
+        double bias = 0;
+
+        for(int b = 0; b < 4; b++)
+        {
+          double b2 = double(bins[b]) / double(reps / 2);
+          b2 = fabs(b2 * 2 - 1);
+
+          if(b2 > bias) bias = b2;
+        }
+
+        if(bias > maxBias)
+        {
+          maxBias = bias;
+          maxK = keybit;
+          maxA = out1;
+          maxB = out2;
+        }
+
+        if(verbose) 
+        {
+          if     (bias < 0.01) printf(".");
+          else if(bias < 0.05) printf("o");
+          else if(bias < 0.33) printf("O");
+          else                 printf("X");
+        }
+      }
+
+      // Finished keybit
+
+      if(verbose) printf("\n");
+    }
+
+    if(verbose)
+    {
+      for(int i = 0; i < keybits+12; i++) printf("-");
+      printf("\n");
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+
+//-----------------------------------------------------------------------------
+// BIC test variant - iterate over output bits, then key bits. No temp storage,
+// but slooooow
+
+template< typename keytype, typename hashtype >
+void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  for(int out2 = out1+1; out2 < hashbits; out2++)
+  {
+    if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+    for(int keybit = 0; keybit < keybits; keybit++)
+    {
+      int bins[4] = { 0, 0, 0, 0 };
+
+      for(int irep = 0; irep < reps; irep++)
+      {
+        r.rand_p(&key,keybytes);
+        hash(&key,keybytes,0,&h1);
+        flipbit(key,keybit);
+        hash(&key,keybytes,0,&h2);
+
+        hashtype d = h1 ^ h2;
+
+        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        bins[b]++;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxK = keybit;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.05) printf(".");
+        else if(bias < 0.10) printf("o");
+        else if(bias < 0.50) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    // Finished keybit
+
+    if(verbose) printf("\n");
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitslice.cpp b/Bitslice.cpp
new file mode 100644
index 0000000..45a2249
--- /dev/null
+++ b/Bitslice.cpp
@@ -0,0 +1,127 @@
+#include "Bitvec.h"
+#include <vector>
+#include <assert.h>
+
+// handle xnor
+
+typedef std::vector<uint32_t> slice;
+typedef std::vector<slice> slice_vec;
+
+int countbits ( slice & v )
+{
+  int c = 0;
+
+  for(size_t i = 0; i < v.size(); i++)
+  {
+    int d = countbits(v[i]);
+
+    c += d;
+  }
+
+  return c;
+}
+
+int countxor ( slice & a, slice & b )
+{
+  assert(a.size() == b.size());
+
+  int c = 0;
+
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    int d = countbits(a[i] ^ b[i]);
+
+    c += d;
+  }
+
+  return c;
+}
+
+void xoreq ( slice & a, slice & b )
+{
+  assert(a.size() == b.size());
+
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    a[i] ^= b[i];
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bitslice a hash set
+
+template< typename hashtype >
+void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
+{
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int slicelen = ((int)hashes.size() + 31) / 32;
+
+  slices.clear();
+  slices.resize(hashbits);
+
+  for(int i = 0; i < (int)slices.size(); i++)
+  {
+    slices[i].resize(slicelen,0);
+  }
+
+  for(int j = 0; j < hashbits; j++)
+  {
+    void * sliceblob = &(slices[j][0]);
+
+    for(int i = 0; i < (int)hashes.size(); i++)
+    {
+      int b = getbit(hashes[i],j);
+
+      setbit(sliceblob,slicelen*4,i,b);
+    }
+  }
+}
+
+void FactorSlices ( slice_vec & slices )
+{
+  std::vector<int> counts(slices.size(),0);
+
+  for(size_t i = 0; i < slices.size(); i++)
+  {
+    counts[i] = countbits(slices[i]);
+  }
+
+  bool changed = true;
+
+  while(changed)
+  {
+    int bestA = -1;
+    int bestB = -1;
+
+    for(int j = 0; j < (int)slices.size()-1; j++)
+    {
+      for(int i = j+1; i < (int)slices.size(); i++)
+      {
+        int d = countxor(slices[i],slices[j]);
+
+        if((d < counts[i]) && (d < counts[j]))
+        {
+          if(counts[i] < counts[j])
+          {
+            bestA = j;
+            bestB = i;
+          }
+        }
+        else if(d < counts[i])
+        {
+          //bestA = 
+        }
+      }
+    }
+  }
+}
+
+
+void foo ( void )
+{
+  slice a;
+  slice_vec b;
+
+  Bitslice(a,b);
+}
\ No newline at end of file
diff --git a/Bitvec.cpp b/Bitvec.cpp
new file mode 100644
index 0000000..4855f8f
--- /dev/null
+++ b/Bitvec.cpp
@@ -0,0 +1,757 @@
+#include "Bitvec.h"
+
+#include "Random.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#ifndef DEBUG
+#undef assert
+void assert ( bool )
+{
+}
+#endif
+
+//----------------------------------------------------------------------------
+
+void printbits ( const void * blob, int len )
+{
+  const uint8_t * data = (const uint8_t *)blob;
+
+  printf("[");
+  for(int i = 0; i < len; i++)
+  {
+    unsigned char byte = data[i];
+
+    int hi = (byte >> 4);
+    int lo = (byte & 0xF);
+
+    if(hi) printf("%01x",hi);
+    else   printf(".");
+
+    if(lo) printf("%01x",lo);
+    else   printf(".");
+
+    if(i != len-1) printf(" ");
+  }
+  printf("]");
+}
+
+void printbits2 ( const uint8_t * k, int nbytes )
+{
+  printf("[");
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t b = k[i];
+
+    for(int j = 7; j >= 0; j--)
+    {
+      uint8_t c = (b & (1 << j)) ? '#' : ' ';
+
+      putc(c,stdout);
+    }
+  }
+  printf("]");
+}
+
+void printhex32 ( const void * blob, int len )
+{
+  assert((len & 3) == 0);
+
+  uint32_t * d = (uint32_t*)blob;
+
+  printf("{ ");
+
+  for(int i = 0; i < len/4; i++) 
+  {
+    printf("0x%08x, ",d[i]);
+  }
+
+  printf("}");
+}
+
+void printbytes ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  printf("{ ");
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("0x%02x, ",d[i]);
+  }
+
+  printf(" };");
+}
+
+void printbytes2 ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("%02x ",d[i]);
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bit-level manipulation
+
+// These two are from the "Bit Twiddling Hacks" webpage
+
+uint32_t popcount ( uint32_t v )
+{
+	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+	uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
+
+	return c;
+}
+
+uint32_t parity ( uint32_t v )
+{
+	v ^= v >> 1;
+	v ^= v >> 2;
+	v = (v & 0x11111111U) * 0x11111111U;
+	return (v >> 28) & 1;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t getbit ( const void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) return (b[byte] >> bit) & 1;
+
+  return 0;
+}
+
+uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  byte %= len;
+    
+  return (b[byte] >> bit) & 1;
+}
+
+void setbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] |= (1 << bit);
+}
+
+void setbit ( void * block, int len, uint32_t bit, uint32_t val )
+{
+  val ? setbit(block,len,bit) : clearbit(block,len,bit);
+}
+
+void clearbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] &= ~(1 << bit);
+}
+
+void flipbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] ^= (1 << bit);
+}
+
+// from the "Bit Twiddling Hacks" webpage
+
+int countbits ( uint32_t v )
+{
+  v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+  int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
+
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+
+void lshift1 ( void * blob, int len, int c )
+{
+  int nbits = len*8;
+
+  for(int i = nbits-1; i >= 0; i--)
+  {
+    setbit(blob,len,i,getbit(blob,len,i-c));
+  }
+}
+
+
+void lshift8 ( void * blob, int nbytes, int c )
+{
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  int b = c >> 3;
+  c &= 7;
+
+  for(int i = nbytes-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
+
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? 0 : k[i-1];
+
+    k[i] = (a << c) | (b >> (8-c));
+  }
+}
+
+void lshift32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
+
+  uint32_t * k = reinterpret_cast<uint32_t*>(blob);
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int i = ndwords-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
+
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? 0 : k[i-1];
+
+    k[i] = (a << c) | (b >> (32-c));
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void rshift1 ( void * blob, int len, int c )
+{
+  int nbits = len*8;
+
+  for(int i = 0; i < nbits; i++)
+  {
+    setbit(blob,len,i,getbit(blob,len,i+c));
+  }
+}
+
+void rshift8 ( void * blob, int nbytes, int c )
+{
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  int b = c >> 3;
+  c &= 7;
+
+  for(int i = 0; i < nbytes-b; i++)
+  {
+    k[i] = k[i+b];
+  }
+
+  for(int i = nbytes-b; i < nbytes; i++)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
+    uint8_t b = k[i];
+
+    k[i] = (a << (8-c) ) | (b >> c);
+  }
+}
+
+void rshift32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  //----------
+
+  if(c == 0) return;
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int i = 0; i < ndwords-b; i++)
+  {
+    k[i] = k[i+b];
+  }
+
+  for(int i = ndwords-b; i < ndwords; i++)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
+    uint32_t b = k[i];
+
+    k[i] = (a << (32-c) ) | (b >> c);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void lrot1 ( void * blob, int len, int c )
+{
+  int nbits = len * 8;
+
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,nbits-1);
+
+    lshift1(blob,len,1);
+
+    setbit(blob,len,0,bit);
+  }
+}
+
+void lrot8 ( void * blob, int len, int c )
+{
+  int nbytes  = len;
+
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 8;
+  c &= (8-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[nbytes-1];
+
+    for(int i = nbytes-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
+
+    k[0] = t;
+  }
+
+  uint8_t t = k[nbytes-1];
+
+  if(c == 0) return;
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? t : k[i-1];
+
+    k[i] = (a << c) | (b >> (8-c));
+  }
+}
+
+void lrot32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes/4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[ndwords-1];
+
+    for(int i = ndwords-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
+
+    k[0] = t;
+  }
+
+  uint32_t t = k[ndwords-1];
+
+  if(c == 0) return;
+
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? t : k[i-1];
+
+    k[i] = (a << c) | (b >> (32-c));
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void rrot1 ( void * blob, int len, int c )
+{
+  int nbits = len * 8;
+
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,0);
+
+    rshift1(blob,len,1);
+
+    setbit(blob,len,nbits-1,bit);
+  }
+}
+
+void rrot8 ( void * blob, int len, int c )
+{
+  int nbytes  = len;
+
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 8;
+  c &= (8-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[0];
+
+    for(int i = 0; i < nbytes-1; i++)
+    {
+      k[i] = k[i+1];
+    }
+
+    k[nbytes-1] = t;
+  }
+
+  if(c == 0) return;
+
+  //----------
+
+  uint8_t t = k[0];
+
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? t : k[i+1];
+    uint8_t b = k[i];
+
+    k[i] = (a << (8-c)) | (b >> c);
+  }
+}
+
+void rrot32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes/4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[0];
+
+    for(int i = 0; i < ndwords-1; i++)
+    {
+      k[i] = k[i+1];
+    }
+
+    k[ndwords-1] = t;
+  }
+
+  if(c == 0) return;
+
+  //----------
+
+  uint32_t t = k[0];
+
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? t : k[i+1];
+    uint32_t b = k[i];
+
+    k[i] = (a << (32-c)) | (b >> c);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t window1 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  uint32_t t = 0;
+
+  for(int i = 0; i < count; i++)
+  {
+    setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
+  }
+
+  return t;
+}
+
+uint32_t window8 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  uint32_t t = 0;
+  uint8_t * k = (uint8_t*)blob;
+
+  if(count == 0) return 0;
+
+  int c = start & (8-1);
+  int d = start / 8;
+
+  for(int i = 0; i < 4; i++)
+  {
+    int ia = (i + d + 1) % len;
+    int ib = (i + d + 0) % len;
+
+    uint32_t a = k[ia];
+    uint32_t b = k[ib];
+    
+    uint32_t m = (a << (8-c)) | (b >> c);
+
+    t |= (m << (8*i));
+
+  }
+
+  t &= ((1 << count)-1);
+
+  return t;
+}
+
+uint32_t window32 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  assert((len & 3) == 0);
+
+  int ndwords = len / 4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(count == 0) return 0;
+
+  int c = start & (32-1);
+  int d = start / 32;
+
+  if(c == 0) return (k[d] & ((1 << count) - 1));
+
+  int ia = (d + 1) % ndwords;
+  int ib = (d + 0) % ndwords;
+
+  uint32_t a = k[ia];
+  uint32_t b = k[ib];
+  
+  uint32_t t = (a << (32-c)) | (b >> c);
+
+  t &= ((1 << count)-1);
+
+  return t;
+}
+
+//-----------------------------------------------------------------------------
+
+bool test_shift ( void )
+{
+  Rand r(1123);
+
+  int nbits   = 64;
+  int nbytes  = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    uint64_t a = r.rand_u64();
+    uint64_t b;
+
+    for(int i = 0; i < nbits; i++)
+    {
+      b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
+
+      b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
+
+      b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
+
+      b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
+    }
+  }
+
+  printf("PASS\n");
+  return true;
+}
+
+//-----------------------------------------------------------------------------
+
+template < int nbits >
+bool test_window2 ( void )
+{
+  Rand r(83874);
+  
+  struct keytype
+  {
+    uint8_t bytes[nbits/8];
+  };
+
+  int nbytes = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    keytype k;
+
+    r.rand_p(&k,nbytes);
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = window1(&k,nbytes,start,count);
+        uint32_t b = window8(&k,nbytes,start,count);
+        uint32_t c = window(&k,nbytes,start,count);
+
+        assert(a == b);
+        assert(a == c);
+      }
+    }
+  }
+
+  printf("PASS %d\n",nbits);
+
+  return true;
+}
+
+bool test_window ( void )
+{
+  Rand r(48402);
+  
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    int nbits   = 64;
+    int nbytes  = nbits / 8;
+
+    uint64_t x = r.rand_u64();
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = (uint32_t)ROTR64(x,start);
+        a &= ((1 << count)-1);
+        
+        uint32_t b = window1 (&x,nbytes,start,count);
+        uint32_t c = window8 (&x,nbytes,start,count);
+        uint32_t d = window32(&x,nbytes,start,count);
+        uint32_t e = window  (x,start,count);
+
+        assert(a == b);
+        assert(a == c);
+        assert(a == d);
+        assert(a == e);
+      }
+    }
+  }
+
+  printf("PASS 64\n");
+
+  test_window2<8>();
+  test_window2<16>();
+  test_window2<24>();
+  test_window2<32>();
+  test_window2<40>();
+  test_window2<48>();
+  test_window2<56>();
+  test_window2<64>();
+
+  return true;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitvec.h b/Bitvec.h
new file mode 100644
index 0000000..4d61979
--- /dev/null
+++ b/Bitvec.h
@@ -0,0 +1,245 @@
+#pragma once
+
+#include "Platform.h"
+
+#include <vector>
+
+//-----------------------------------------------------------------------------
+
+void     printbits   ( const void * blob, int len );
+void     printhex32  ( const void * blob, int len );
+void     printbytes  ( const void * blob, int len );
+void     printbytes2 ( const void * blob, int len );
+
+uint32_t popcount    ( uint32_t v );
+uint32_t parity      ( uint32_t v );
+
+uint32_t getbit      ( const void * blob, int len, uint32_t bit );
+uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
+
+void     setbit      ( void * blob, int len, uint32_t bit );
+void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
+
+void     clearbit    ( void * blob, int len, uint32_t bit );
+
+void     flipbit     ( void * blob, int len, uint32_t bit );
+
+int      countbits   ( uint32_t v );
+int      countbits   ( std::vector<uint32_t> & v );
+
+int      countbits   ( const void * blob, int len );
+
+void     invert      ( std::vector<uint32_t> & v );
+
+//----------
+
+template< typename T >
+inline uint32_t getbit ( T & blob, uint32_t bit )
+{
+  return getbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
+template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
+
+//----------
+
+template< typename T >
+inline void setbit ( T & blob, uint32_t bit )
+{
+  return setbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
+template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
+
+//----------
+
+template< typename T >
+inline void flipbit ( T & blob, uint32_t bit )
+{
+  flipbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
+template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
+
+//-----------------------------------------------------------------------------
+// Left and right shift of blobs. The shift(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lshift1  ( void * blob, int len, int c );
+void lshift8  ( void * blob, int len, int c );
+void lshift32 ( void * blob, int len, int c );
+
+void rshift1  ( void * blob, int len, int c );
+void rshift8  ( void * blob, int len, int c );
+void rshift32 ( void * blob, int len, int c );
+
+inline void lshift ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    lshift32(blob,len,c);
+  }
+  else
+  {
+    lshift8(blob,len,c);
+  }
+}
+
+inline void rshift ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    rshift32(blob,len,c);
+  }
+  else
+  {
+    rshift8(blob,len,c);
+  }
+}
+
+template < typename T >
+inline void lshift ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
+}
+
+template < typename T >
+inline void rshift ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
+}
+
+template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
+template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
+template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
+template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
+
+//-----------------------------------------------------------------------------
+// Left and right rotate of blobs. The rot(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lrot1    ( void * blob, int len, int c );
+void lrot8    ( void * blob, int len, int c );
+void lrot32   ( void * blob, int len, int c );
+
+void rrot1    ( void * blob, int len, int c );
+void rrot8    ( void * blob, int len, int c );
+void rrot32   ( void * blob, int len, int c );
+
+inline void lrot ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    return lrot32(blob,len,c);
+  }
+  else
+  {
+    return lrot8(blob,len,c);
+  }
+}
+
+inline void rrot ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    return rrot32(blob,len,c);
+  }
+  else
+  {
+    return rrot8(blob,len,c);
+  }
+}
+
+template < typename T >
+inline void lrot ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return lrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return lrot8(&blob,sizeof(T),c);
+  }
+}
+
+template < typename T >
+inline void rrot ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return rrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return rrot8(&blob,sizeof(T),c);
+  }
+}
+
+template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
+template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
+template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
+template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
+
+//-----------------------------------------------------------------------------
+// Bit-windowing functions - select some N-bit subset of the input blob
+
+uint32_t window1  ( void * blob, int len, int start, int count );
+uint32_t window8  ( void * blob, int len, int start, int count );
+uint32_t window32 ( void * blob, int len, int start, int count );
+
+inline uint32_t window ( void * blob, int len, int start, int count )
+{
+  if(len & 3)
+  {
+    return window8(blob,len,start,count);
+  }
+  else
+  {
+    return window32(blob,len,start,count);
+  }
+}
+
+template < typename T >
+inline uint32_t window ( T & blob, int start, int count )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return window32(&blob,sizeof(T),start,count);
+  }
+  else
+  {
+    return window8(&blob,sizeof(T),start,count);
+  }
+}
+
+template<> 
+inline uint32_t window ( uint32_t & blob, int start, int count )
+{
+  return ROTR32(blob,start) & ((1<<count)-1);
+}
+
+template<> 
+inline uint32_t window ( uint64_t & blob, int start, int count )
+{
+  return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..3aaec87
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,43 @@
+project(SMHasher)
+
+cmake_minimum_required(VERSION 2.4)
+
+set(CMAKE_BUILD_TYPE Release)
+
+add_library(
+  SMHasherSupport
+  AvalancheTest.cpp
+  Bitslice.cpp
+  Bitvec.cpp
+  CityTest.cpp
+  City.cpp
+  crc.cpp
+  DifferentialTest.cpp
+  Hashes.cpp
+  KeysetTest.cpp
+  lookup3.cpp
+  md5.cpp
+  MurmurHash1.cpp
+  MurmurHash2.cpp
+  MurmurHash3.cpp
+  Platform.cpp
+  Random.cpp
+  sha1.cpp
+  SpeedTest.cpp
+  Spooky.cpp
+  SpookyTest.cpp
+  Stats.cpp
+  SuperFastHash.cpp
+  Types.cpp
+  PMurHash.c
+)
+
+add_executable(
+  SMHasher
+  main.cpp
+)
+
+target_link_libraries(
+  SMHasher
+  SMHasherSupport
+)
diff --git a/City.cpp b/City.cpp
new file mode 100644
index 0000000..d7c33bc
--- /dev/null
+++ b/City.cpp
@@ -0,0 +1,465 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides CityHash64() and related functions.
+//
+// It's probably possible to create even faster hash functions by
+// writing a program that systematically explores some of the space of
+// possible hash functions, by using SIMD instructions, or by
+// compromising on hash quality.
+
+#include "City.h"
+
+#include <algorithm>
+#include <string.h>  // for memcpy and memset
+
+using namespace std;
+
+static uint64 UNALIGNED_LOAD64(const char *p) {
+  uint64 result;
+  memcpy(&result, p, sizeof(result));
+  return result;
+}
+
+static uint32 UNALIGNED_LOAD32(const char *p) {
+  uint32 result;
+  memcpy(&result, p, sizeof(result));
+  return result;
+}
+
+#ifndef __BIG_ENDIAN__
+
+#define uint32_in_expected_order(x) (x)
+#define uint64_in_expected_order(x) (x)
+
+#else
+
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#else
+#include <byteswap.h>
+#endif
+
+#define uint32_in_expected_order(x) (bswap_32(x))
+#define uint64_in_expected_order(x) (bswap_64(x))
+
+#endif  // __BIG_ENDIAN__
+
+#if !defined(LIKELY)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define LIKELY(x) (__builtin_expect(!!(x), 1))
+#else
+#define LIKELY(x) (x)
+#endif
+#endif
+
+static uint64 Fetch64(const char *p) {
+  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
+}
+
+static uint32 Fetch32(const char *p) {
+  return uint32_in_expected_order(UNALIGNED_LOAD32(p));
+}
+
+// Some primes between 2^63 and 2^64 for various uses.
+static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
+static const uint64 k1 = 0xb492b66fbe98f273ULL;
+static const uint64 k2 = 0x9ae16a3b2f90404fULL;
+static const uint64 k3 = 0xc949d7c7509e6557ULL;
+
+// Bitwise right rotate.  Normally this will compile to a single
+// instruction, especially if the shift is a manifest constant.
+static uint64 Rotate(uint64 val, int shift) {
+  // Avoid shifting by 64: doing so yields an undefined result.
+  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
+}
+
+// Equivalent to Rotate(), but requires the second arg to be non-zero.
+// On x86-64, and probably others, it's possible for this to compile
+// to a single instruction if both args are already in registers.
+static uint64 RotateByAtLeast1(uint64 val, int shift) {
+  return (val >> shift) | (val << (64 - shift));
+}
+
+static uint64 ShiftMix(uint64 val) {
+  return val ^ (val >> 47);
+}
+
+static uint64 HashLen16(uint64 u, uint64 v) {
+  return Hash128to64(uint128(u, v));
+}
+
+static uint64 HashLen0to16(const char *s, size_t len) {
+  if (len > 8) {
+    uint64 a = Fetch64(s);
+    uint64 b = Fetch64(s + len - 8);
+    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
+  }
+  if (len >= 4) {
+    uint64 a = Fetch32(s);
+    return HashLen16(len + (a << 3), Fetch32(s + len - 4));
+  }
+  if (len > 0) {
+    uint8 a = s[0];
+    uint8 b = s[len >> 1];
+    uint8 c = s[len - 1];
+    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
+    uint32 z = len + (static_cast<uint32>(c) << 2);
+    return ShiftMix(y * k2 ^ z * k3) * k2;
+  }
+  return k2;
+}
+
+// This probably works well for 16-byte strings as well, but it may be overkill
+// in that case.
+static uint64 HashLen17to32(const char *s, size_t len) {
+  uint64 a = Fetch64(s) * k1;
+  uint64 b = Fetch64(s + 8);
+  uint64 c = Fetch64(s + len - 8) * k2;
+  uint64 d = Fetch64(s + len - 16) * k0;
+  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
+                   a + Rotate(b ^ k3, 20) - c + len);
+}
+
+// Return a 16-byte hash for 48 bytes.  Quick and dirty.
+// Callers do best to use "random-looking" values for a and b.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
+  a += w;
+  b = Rotate(b + a + z, 21);
+  uint64 c = a;
+  a += x;
+  a += y;
+  b += Rotate(a, 44);
+  return make_pair(a + z, b + c);
+}
+
+// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    const char* s, uint64 a, uint64 b) {
+  return WeakHashLen32WithSeeds(Fetch64(s),
+                                Fetch64(s + 8),
+                                Fetch64(s + 16),
+                                Fetch64(s + 24),
+                                a,
+                                b);
+}
+
+// Return an 8-byte hash for 33 to 64 bytes.
+static uint64 HashLen33to64(const char *s, size_t len) {
+  uint64 z = Fetch64(s + 24);
+  uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
+  uint64 b = Rotate(a + z, 52);
+  uint64 c = Rotate(a, 37);
+  a += Fetch64(s + 8);
+  c += Rotate(a, 7);
+  a += Fetch64(s + 16);
+  uint64 vf = a + z;
+  uint64 vs = b + Rotate(a, 31) + c;
+  a = Fetch64(s + 16) + Fetch64(s + len - 32);
+  z = Fetch64(s + len - 8);
+  b = Rotate(a + z, 52);
+  c = Rotate(a, 37);
+  a += Fetch64(s + len - 24);
+  c += Rotate(a, 7);
+  a += Fetch64(s + len - 16);
+  uint64 wf = a + z;
+  uint64 ws = b + Rotate(a, 31) + c;
+  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
+  return ShiftMix(r * k0 + vs) * k2;
+}
+
+uint64 CityHash64(const char *s, size_t len) {
+  if (len <= 32) {
+    if (len <= 16) {
+      return HashLen0to16(s, len);
+    } else {
+      return HashLen17to32(s, len);
+    }
+  } else if (len <= 64) {
+    return HashLen33to64(s, len);
+  }
+
+  // For strings over 64 bytes we hash the end first, and then as we
+  // loop we keep 56 bytes of state: v, w, x, y, and z.
+  uint64 x = Fetch64(s + len - 40);
+  uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
+  uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
+  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
+  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
+  x = x * k1 + Fetch64(s);
+
+  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
+  len = (len - 1) & ~static_cast<size_t>(63);
+  do {
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    len -= 64;
+  } while (len != 0);
+  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
+                   HashLen16(v.second, w.second) + x);
+}
+
+uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
+  return CityHash64WithSeeds(s, len, k2, seed);
+}
+
+uint64 CityHash64WithSeeds(const char *s, size_t len,
+                           uint64 seed0, uint64 seed1) {
+  return HashLen16(CityHash64(s, len) - seed0, seed1);
+}
+
+// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
+// of any length representable in signed long.  Based on City and Murmur.
+static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
+  uint64 a = Uint128Low64(seed);
+  uint64 b = Uint128High64(seed);
+  uint64 c = 0;
+  uint64 d = 0;
+  signed long l = len - 16;
+  if (l <= 0) {  // len <= 16
+    a = ShiftMix(a * k1) * k1;
+    c = b * k1 + HashLen0to16(s, len);
+    d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
+  } else {  // len > 16
+    c = HashLen16(Fetch64(s + len - 8) + k1, a);
+    d = HashLen16(b + len, c + Fetch64(s + len - 16));
+    a += d;
+    do {
+      a ^= ShiftMix(Fetch64(s) * k1) * k1;
+      a *= k1;
+      b ^= a;
+      c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
+      c *= k1;
+      d ^= c;
+      s += 16;
+      l -= 16;
+    } while (l > 0);
+  }
+  a = HashLen16(a, c);
+  b = HashLen16(d, b);
+  return uint128(a ^ b, HashLen16(b, a));
+}
+
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len < 128) {
+    return CityMurmur(s, len, seed);
+  }
+
+  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+  // v, w, x, y, and z.
+  pair<uint64, uint64> v, w;
+  uint64 x = Uint128Low64(seed);
+  uint64 y = Uint128High64(seed);
+  uint64 z = len * k1;
+  v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
+  v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
+  w.first = Rotate(y + z, 35) * k1 + x;
+  w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
+
+  // This is the same inner loop as CityHash64(), manually unrolled.
+  do {
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    len -= 128;
+  } while (LIKELY(len >= 128));
+  x += Rotate(v.first + z, 49) * k0;
+  z += Rotate(w.first, 37) * k0;
+  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+  for (size_t tail_done = 0; tail_done < len; ) {
+    tail_done += 32;
+    y = Rotate(x + y, 42) * k0 + v.second;
+    w.first += Fetch64(s + len - tail_done + 16);
+    x = x * k0 + w.first;
+    z += w.second + Fetch64(s + len - tail_done);
+    w.second += v.first;
+    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
+  }
+  // At this point our 56 bytes of state should contain more than
+  // enough information for a strong 128-bit hash.  We use two
+  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
+  x = HashLen16(x, v.first);
+  y = HashLen16(y + z, w.first);
+  return uint128(HashLen16(x + v.second, w.second) + y,
+                 HashLen16(x + w.second, y + v.second));
+}
+
+uint128 CityHash128(const char *s, size_t len) {
+  if (len >= 16) {
+    return CityHash128WithSeed(s + 16,
+                               len - 16,
+                               uint128(Fetch64(s) ^ k3,
+                                       Fetch64(s + 8)));
+  } else if (len >= 8) {
+    return CityHash128WithSeed(NULL,
+                               0,
+                               uint128(Fetch64(s) ^ (len * k0),
+                                       Fetch64(s + len - 8) ^ k1));
+  } else {
+    return CityHash128WithSeed(s, len, uint128(k0, k1));
+  }
+}
+
+#if defined(__SSE4_2__) && defined(__x86_64__)
+#include <nmmintrin.h>
+
+// Requires len >= 240.
+static void CityHashCrc256Long(const char *s, size_t len,
+                               uint32 seed, uint64 *result) {
+  uint64 a = Fetch64(s + 56) + k0;
+  uint64 b = Fetch64(s + 96) + k0;
+  uint64 c = result[0] = HashLen16(b, len);
+  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
+  uint64 e = Fetch64(s + 184) + seed;
+  uint64 f = seed;
+  uint64 g = 0;
+  uint64 h = 0;
+  uint64 i = 0;
+  uint64 j = 0;
+  uint64 t = c + d;
+
+  // 240 bytes of input per iter.
+  size_t iters = len / 240;
+  len -= iters * 240;
+  do {
+#define CHUNK(multiplier, z)                                    \
+    {                                                           \
+      uint64 old_a = a;                                         \
+      a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
+      b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
+      c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
+      d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
+      e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
+      t = old_a;                                                \
+    }                                                           \
+    f = _mm_crc32_u64(f, a);                                    \
+    g = _mm_crc32_u64(g, b);                                    \
+    h = _mm_crc32_u64(h, c);                                    \
+    i = _mm_crc32_u64(i, d);                                    \
+    j = _mm_crc32_u64(j, e);                                    \
+    s += 40
+
+    CHUNK(1, 1); CHUNK(k0, 0);
+    CHUNK(1, 1); CHUNK(k0, 0);
+    CHUNK(1, 1); CHUNK(k0, 0);
+  } while (--iters > 0);
+
+  while (len >= 40) {
+    CHUNK(k0, 0);
+    len -= 40;
+  }
+  if (len > 0) {
+    s = s + len - 40;
+    CHUNK(k0, 0);
+  }
+  j += i << 32;
+  a = HashLen16(a, j);
+  h += g << 32;
+  b += h;
+  c = HashLen16(c, f) + i;
+  d = HashLen16(d, e + result[0]);
+  j += e;
+  i += HashLen16(h, t);
+  e = HashLen16(a, d) + j;
+  f = HashLen16(b, c) + a;
+  g = HashLen16(j, i) + c;
+  result[0] = e + f + g + h;
+  a = ShiftMix((a + g) * k0) * k0 + b;
+  result[1] += a + result[0];
+  a = ShiftMix(a * k0) * k0 + c;
+  result[2] = a + result[1];
+  a = ShiftMix((a + e) * k0) * k0;
+  result[3] = a + result[2];
+}
+
+// Requires len < 240.
+static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
+  char buf[240];
+  memcpy(buf, s, len);
+  memset(buf + len, 0, 240 - len);
+  CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
+}
+
+void CityHashCrc256(const char *s, size_t len, uint64 *result) {
+  if (LIKELY(len >= 240)) {
+    CityHashCrc256Long(s, len, 0, result);
+  } else {
+    CityHashCrc256Short(s, len, result);
+  }
+}
+
+uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len <= 900) {
+    return CityHash128WithSeed(s, len, seed);
+  } else {
+    uint64 result[4];
+    CityHashCrc256(s, len, result);
+    uint64 u = Uint128High64(seed) + result[0];
+    uint64 v = Uint128Low64(seed) + result[1];
+    return uint128(HashLen16(u, v + result[2]),
+                   HashLen16(Rotate(v, 32), u * k0 + result[3]));
+  }
+}
+
+uint128 CityHashCrc128(const char *s, size_t len) {
+  if (len <= 900) {
+    return CityHash128(s, len);
+  } else {
+    uint64 result[4];
+    CityHashCrc256(s, len, result);
+    return uint128(result[2], result[3]);
+  }
+}
+
+#endif
diff --git a/City.h b/City.h
new file mode 100644
index 0000000..c12c1bb
--- /dev/null
+++ b/City.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides a few functions for hashing strings. On x86-64
+// hardware in 2011, CityHash64() is faster than other high-quality
+// hash functions, such as Murmur.  This is largely due to higher
+// instruction-level parallelism.  CityHash64() and CityHash128() also perform
+// well on hash-quality tests.
+//
+// CityHash128() is optimized for relatively long strings and returns
+// a 128-bit hash.  For strings more than about 2000 bytes it can be
+// faster than CityHash64().
+//
+// Functions in the CityHash family are not suitable for cryptography.
+//
+// WARNING: This code has not been tested on big-endian platforms!
+// It is known to work well on little-endian platforms that have a small penalty
+// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
+//
+// By the way, for some hash functions, given strings a and b, the hash
+// of a+b is easily derived from the hashes of a and b.  This property
+// doesn't hold for any hash functions in this file.
+
+#ifndef CITY_HASH_H_
+#define CITY_HASH_H_
+
+#include "Platform.h"
+#include <stdlib.h>  // for size_t.
+//#include <stdint.h>
+#include <utility>
+
+typedef uint8_t uint8;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+typedef std::pair<uint64, uint64> uint128;
+
+inline uint64 Uint128Low64(const uint128& x) { return x.first; }
+inline uint64 Uint128High64(const uint128& x) { return x.second; }
+
+// Hash function for a byte array.
+uint64 CityHash64(const char *buf, size_t len);
+
+// Hash function for a byte array.  For convenience, a 64-bit seed is also
+// hashed into the result.
+uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
+
+// Hash function for a byte array.  For convenience, two seeds are also
+// hashed into the result.
+uint64 CityHash64WithSeeds(const char *buf, size_t len,
+                           uint64 seed0, uint64 seed1);
+
+// Hash function for a byte array.
+uint128 CityHash128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash 128 input bits down to 64 bits of output.
+// This is intended to be a reasonably good hash function.
+inline uint64 Hash128to64(const uint128& x) {
+  // Murmur-inspired hashing.
+  const uint64 kMul = 0x9ddfea08eb382d69ULL;
+  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
+  a ^= (a >> 47);
+  uint64 b = (Uint128High64(x) ^ a) * kMul;
+  b ^= (b >> 47);
+  b *= kMul;
+  return b;
+}
+
+// Conditionally include declarations for versions of City that require SSE4.2
+// instructions to be available.
+#if defined(__SSE4_2__) && defined(__x86_64__)
+
+// Hash function for a byte array.
+uint128 CityHashCrc128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash function for a byte array.  Sets result[0] ... result[3].
+void CityHashCrc256(const char *s, size_t len, uint64 *result);
+
+#endif  // __SSE4_2__
+
+#endif  // CITY_HASH_H_
diff --git a/CityTest.cpp b/CityTest.cpp
new file mode 100644
index 0000000..4190cc8
--- /dev/null
+++ b/CityTest.cpp
@@ -0,0 +1,15 @@
+#include "City.h"
+
+void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);
+}
+
+void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint128 s(0,0);
+
+  s.first = seed;
+
+  *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
+}
diff --git a/DifferentialTest.cpp b/DifferentialTest.cpp
new file mode 100644
index 0000000..d9067c9
--- /dev/null
+++ b/DifferentialTest.cpp
@@ -0,0 +1,3 @@
+#include "DifferentialTest.h"
+
+//----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
new file mode 100644
index 0000000..824d72e
--- /dev/null
+++ b/DifferentialTest.h
@@ -0,0 +1,281 @@
+//-----------------------------------------------------------------------------
+// Differential collision & distribution tests - generate a bunch of random keys,
+// see what happens to the hash value when we flip a few bits of the key.
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"      // for chooseUpToK
+#include "KeysetTest.h" // for SparseKeygenRecurse
+#include "Random.h"
+
+#include <vector>
+#include <algorithm>
+#include <stdio.h>
+
+//-----------------------------------------------------------------------------
+// Sort through the differentials, ignoring collisions that only occured once 
+// (these could be false positives). If we find collisions of 3 or more, the
+// differential test fails.
+
+template < class keytype >
+bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
+{
+  std::sort(diffs.begin(), diffs.end());
+
+  int count = 1;
+  int ignore = 0;
+
+  bool result = true;
+
+  if(diffs.size())
+  {
+    keytype kp = diffs[0];
+
+    for(int i = 1; i < (int)diffs.size(); i++)
+    {
+      if(diffs[i] == kp)
+      {
+        count++;
+        continue;
+      }
+      else
+      {
+        if(count > 1)
+        {
+          result = false;
+
+          double pct = 100 * (double(count) / double(reps));
+
+          if(dumpCollisions)
+          {
+            printbits((unsigned char*)&kp,sizeof(kp));
+            printf(" - %4.2f%%\n", pct );
+          }
+        }
+        else 
+        {
+          ignore++;
+        }
+
+        kp = diffs[i];
+        count = 1;
+      }
+    }
+
+    if(count > 1)
+    {
+      double pct = 100 * (double(count) / double(reps));
+
+      if(dumpCollisions)
+      {
+        printbits((unsigned char*)&kp,sizeof(kp));
+        printf(" - %4.2f%%\n", pct );
+      }
+    }
+    else 
+    {
+      ignore++;
+    }
+  }
+
+  printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
+
+  if(result == false)
+  {
+    printf(" !!!!! ");
+  }
+
+  printf("\n");
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Check all possible keybits-choose-N differentials for collisions, report
+// ones that occur significantly more often than expected.
+
+// Random collisions can happen with probability 1 in 2^32 - if we do more than
+// 2^32 tests, we'll probably see some spurious random collisions, so don't report
+// them.
+
+template < typename keytype, typename hashtype >
+void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
+{
+  const int bits = sizeof(keytype)*8;
+
+  for(int i = start; i < bits; i++)
+  {
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft--;
+
+    hash(&k2,sizeof(k2),0,&h2);
+
+    if(h1 == h2)
+    {
+      diffs.push_back(k1 ^ k2);
+    }
+
+    if(bitsleft)
+    {
+      DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
+    }
+
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft++;
+  }
+}
+
+//----------
+
+template < typename keytype, typename hashtype >
+bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
+{
+  const int keybits = sizeof(keytype) * 8;
+  const int hashbits = sizeof(hashtype) * 8;
+
+  double diffcount = chooseUpToK(keybits,diffbits);
+  double testcount = (diffcount * double(reps));
+  double expected  = testcount / pow(2.0,double(hashbits));
+
+  Rand r(100);
+
+  std::vector<keytype> diffs;
+
+  keytype k1,k2;
+  hashtype h1,h2;
+
+  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
+  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+
+  for(int i = 0; i < reps; i++)
+  {
+    if(i % (reps/10) == 0) printf(".");
+
+    r.rand_p(&k1,sizeof(keytype));
+    k2 = k1;
+
+    hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
+
+    DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
+  }
+  printf("\n");
+
+  bool result = true;
+
+  result &= ProcessDifferentials(diffs,reps,dumpCollisions);
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Differential distribution test - for each N-bit input differential, generate
+// a large set of differential key pairs, hash them, and test the output 
+// differentials using our distribution test code.
+
+// This is a very hard test to pass - even if the hash values are well-distributed,
+// the differences between hash values may not be. It's also not entirely relevant
+// for testing hash functions, but it's still interesting.
+
+// This test is a _lot_ of work, as it's essentially a full keyset test for
+// each of a potentially huge number of input differentials. To speed things
+// along, we do only a few distribution tests per keyset instead of the full
+// grid.
+
+// #TODO - put diagram drawing back on
+
+template < typename keytype, typename hashtype >
+void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
+{
+  std::vector<keytype>  keys(trials);
+  std::vector<hashtype> A(trials),B(trials);
+
+  for(int i = 0; i < trials; i++)
+  {
+    rand_p(&keys[i],sizeof(keytype));
+
+    hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
+  }
+
+  //----------
+
+  std::vector<keytype> diffs;
+
+  keytype temp(0);
+
+  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
+
+  //----------
+
+  worst = 0;
+  avg = 0;
+
+  hashtype h2;
+
+  for(size_t j = 0; j < diffs.size(); j++)
+  {
+    keytype & d = diffs[j];
+
+    for(int i = 0; i < trials; i++)
+    {
+      keytype k2 = keys[i] ^ d;
+
+      hash(&k2,sizeof(k2),0,&h2);
+
+      B[i] = A[i] ^ h2;
+    }
+
+    double dworst,davg;
+
+    TestDistributionFast(B,dworst,davg);
+
+    avg += davg;
+    worst = (dworst > worst) ? dworst : worst;
+  }
+
+  avg /= double(diffs.size());
+}
+
+//-----------------------------------------------------------------------------
+// Simpler differential-distribution test - for all 1-bit differentials,
+// generate random key pairs and run full distribution/collision tests on the
+// hash differentials
+
+template < typename keytype, typename hashtype >
+bool DiffDistTest2 ( pfHash hash  )
+{
+  Rand r(857374);
+
+  int keybits = sizeof(keytype) * 8;
+  const int keycount = 256*256*32;
+  keytype k;
+  
+  std::vector<hashtype> hashes(keycount);
+  hashtype h1,h2;
+
+  bool result = true;
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    printf("Testing bit %d\n",keybit);
+
+    for(int i = 0; i < keycount; i++)
+    {
+      r.rand_p(&k,sizeof(keytype));
+      
+      hash(&k,sizeof(keytype),0,&h1);
+      flipbit(&k,sizeof(keytype),keybit);
+      hash(&k,sizeof(keytype),0,&h2);
+
+      hashes[i] = h1 ^ h2;
+    }
+
+    result &= TestHashList<hashtype>(hashes,true,true,true);
+    printf("\n");
+  }
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
diff --git a/Hashes.cpp b/Hashes.cpp
new file mode 100644
index 0000000..36a6c96
--- /dev/null
+++ b/Hashes.cpp
@@ -0,0 +1,155 @@
+#include "Hashes.h"
+
+#include "Random.h"
+
+
+#include <stdlib.h>
+//#include <stdint.h>
+#include <assert.h>
+//#include <emmintrin.h>
+//#include <xmmintrin.h>
+
+//----------------------------------------------------------------------------
+// fake / bad hashes
+
+void BadHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= h >> 3;
+    h ^= h << 5;
+    h ^= data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void sumhash ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  for(int i = 0; i < len; i++)
+  {
+    h += data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint32_t * data = (const uint32_t*)key;
+
+  for(int i = 0; i < len/4; i++)
+  {
+    h += data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void DoNothingHash ( const void *, int, uint32_t, void * )
+{
+}
+
+//-----------------------------------------------------------------------------
+// One-byte-at-a-time hash based on Murmur's mix
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
+{
+  const uint8_t * data = (const uint8_t*)key;
+
+  uint32_t h = seed;
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 0x5bd1e995;
+    h ^= h >> 15;
+  }
+
+  return h;
+}
+
+void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurOAAT(key,len,seed);
+}
+
+//----------------------------------------------------------------------------
+
+void FNV ( const void * key, int len, uint32_t seed, void * out )
+{
+  unsigned int h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  h ^= BIG_CONSTANT(2166136261);
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 16777619;
+  }
+
+  *(uint32_t*)out = h;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t x17 ( const void * key, int len, uint32_t h ) 
+{
+  const uint8_t * data = (const uint8_t*)key;
+    
+  for(int i = 0; i < len; ++i) 
+  {
+        h = 17 * h + (data[i] - ' ');
+    }
+
+    return h ^ (h >> 16);
+}
+
+//-----------------------------------------------------------------------------
+
+void Bernstein ( const void * key, int len, uint32_t seed, void * out ) 
+{
+  const uint8_t * data = (const uint8_t*)key;
+    
+  for(int i = 0; i < len; ++i) 
+  {
+        seed = 33 * seed + data[i];
+    }
+
+  *(uint32_t*)out = seed;
+}
+
+//-----------------------------------------------------------------------------
+// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
+
+uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
+  #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
+  #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
+
+  const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
+  uint32_t h = len + seed, k = n + len;
+  uint64_t p;
+
+  while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
+  if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
+  if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
+  c8fold( h ^ k, n, k, k )
+  return k;
+}
+
+void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
+}
diff --git a/Hashes.h b/Hashes.h
new file mode 100644
index 0000000..6c04ae1
--- /dev/null
+++ b/Hashes.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include "Types.h"
+
+#include "MurmurHash1.h"
+#include "MurmurHash2.h"
+#include "MurmurHash3.h"
+
+//----------
+// These are _not_ hash functions (even though people tend to use crc32 as one...)
+
+void sumhash               ( const void * key, int len, uint32_t seed, void * out );
+void sumhash32             ( const void * key, int len, uint32_t seed, void * out );
+
+void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
+void crc32                 ( const void * key, int len, uint32_t seed, void * out );
+
+void randhash_32           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_64           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_128          ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// Cryptographic hashes
+
+void md5_32                ( const void * key, int len, uint32_t seed, void * out );
+void sha1_32a              ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// General purpose hashes
+
+void FNV                   ( const void * key, int len, uint32_t seed, void * out );
+void Bernstein             ( const void * key, int len, uint32_t seed, void * out );
+void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
+void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
+void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );
+void CityHash128_test      ( const void * key, int len, uint32_t seed, void * out );
+void CityHash64_test       ( const void * key, int len, uint32_t seed, void * out );
+
+void SpookyHash32_test     ( const void * key, int len, uint32_t seed, void * out );
+void SpookyHash64_test     ( const void * key, int len, uint32_t seed, void * out );
+void SpookyHash128_test    ( const void * key, int len, uint32_t seed, void * out );
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
+
+//----------
+// MurmurHash2
+
+void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+// Test harnesses for Murmur1/2
+
+inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash1(key,len,seed);
+}
+
+inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash2(key,len,seed);
+}
+
+inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash2A(key,len,seed);
+}
+
+inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64_t*)out = MurmurHash64A(key,len,seed);
+}
+
+inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64_t*)out = MurmurHash64B(key,len,seed);
+}
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
new file mode 100644
index 0000000..7077277
--- /dev/null
+++ b/KeysetTest.cpp
@@ -0,0 +1,330 @@
+#include "KeysetTest.h"
+
+#include "Platform.h"
+#include "Random.h"
+
+#include <map>
+#include <set>
+
+//-----------------------------------------------------------------------------
+// This should hopefully be a thorough and uambiguous test of whether a hash
+// is correctly implemented on a given platform
+
+bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
+{
+  const int hashbytes = hashbits / 8;
+
+  uint8_t * key    = new uint8_t[256];
+  uint8_t * hashes = new uint8_t[hashbytes * 256];
+  uint8_t * final  = new uint8_t[hashbytes];
+
+  memset(key,0,256);
+  memset(hashes,0,hashbytes*256);
+  memset(final,0,hashbytes);
+
+  // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
+  // the seed
+
+  for(int i = 0; i < 256; i++)
+  {
+    key[i] = (uint8_t)i;
+
+    hash(key,i,256-i,&hashes[i*hashbytes]);
+  }
+
+  // Then hash the result array
+
+  hash(hashes,hashbytes*256,0,final);
+
+  // The first four bytes of that hash, interpreted as a little-endian integer, is our
+  // verification value
+
+  uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
+
+  delete [] key;
+  delete [] hashes;
+  delete [] final;
+
+  //----------
+
+  if(expected != verification)
+  {
+    if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
+    return false;
+  }
+  else
+  {
+    if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
+    return true;
+  }
+}
+
+//----------------------------------------------------------------------------
+// Basic sanity checks -
+
+// A hash function should not be reading outside the bounds of the key.
+
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
+
+// Hashing the same key twice should always produce the same result.
+
+// The memory alignment of the key should not affect the hash result.
+
+bool SanityTest ( pfHash hash, const int hashbits )
+{
+  printf("Running sanity check 1");
+  
+  Rand r(883741);
+
+  bool result = true;
+
+  const int hashbytes = hashbits/8;
+  const int reps = 10;
+  const int keymax = 256;
+  const int pad = 16;
+  const int buflen = keymax + pad*3;
+  
+  uint8_t * buffer1 = new uint8_t[buflen];
+  uint8_t * buffer2 = new uint8_t[buflen];
+
+  uint8_t * hash1 = new uint8_t[hashbytes];
+  uint8_t * hash2 = new uint8_t[hashbytes];
+
+  //----------
+  
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    for(int len = 4; len <= keymax; len++)
+    {
+      for(int offset = pad; offset < pad*2; offset++)
+      {
+        uint8_t * key1 = &buffer1[pad];
+        uint8_t * key2 = &buffer2[pad+offset];
+
+        r.rand_p(buffer1,buflen);
+        r.rand_p(buffer2,buflen);
+
+        memcpy(key2,key1,len);
+
+        hash(key1,len,0,hash1);
+
+        for(int bit = 0; bit < (len * 8); bit++)
+        {
+          // Flip a bit, hash the key -> we should get a different result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) == 0)
+          {
+            result = false;
+          }
+
+          // Flip it back, hash again -> we should get the original result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) != 0)
+          {
+            result = false;
+          }
+        }
+      }
+    }
+  }
+
+  if(result == false)
+  {
+    printf("*********FAIL*********\n");
+  }
+  else
+  {
+    printf("PASS\n");
+  }
+
+  delete [] buffer1;
+  delete [] buffer2;
+
+  delete [] hash1;
+  delete [] hash2;
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Appending zero bytes to a key should always cause it to produce a different
+// hash value
+
+void AppendedZeroesTest ( pfHash hash, const int hashbits )
+{
+  printf("Running sanity check 2");
+  
+  Rand r(173994);
+
+  const int hashbytes = hashbits/8;
+
+  for(int rep = 0; rep < 100; rep++)
+  {
+    if(rep % 10 == 0) printf(".");
+
+    unsigned char key[256];
+
+    memset(key,0,sizeof(key));
+
+    r.rand_p(key,32);
+
+    uint32_t h1[16];
+    uint32_t h2[16];
+
+    memset(h1,0,hashbytes);
+    memset(h2,0,hashbytes);
+
+    for(int i = 0; i < 32; i++)
+    {
+      hash(key,32+i,0,h1);
+
+      if(memcmp(h1,h2,hashbytes) == 0)
+      {
+        printf("\n*********FAIL*********\n");
+        return;
+      }
+
+      memcpy(h2,h1,hashbytes);
+    }
+  }
+
+  printf("PASS\n");
+}
+
+//-----------------------------------------------------------------------------
+// Generate all keys of up to N bytes containing two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c )
+{
+  //----------
+  // Compute # of keys
+
+  int keycount = 0;
+
+  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
+
+  keycount *= 255*255;
+
+  for(int i = 2; i <= maxlen; i++) keycount += i*255;
+
+  printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
+
+  c.reserve(keycount);
+
+  //----------
+  // Add all keys with one non-zero byte
+
+  uint8_t key[256];
+
+  memset(key,0,256);
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen; byteA++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      c(key,keylen);
+    }
+
+    key[byteA] = 0;
+  }
+
+  //----------
+  // Add all keys with two non-zero bytes
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen-1; byteA++)
+  for(int byteB = byteA+1; byteB < keylen; byteB++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      for(int valB = 1; valB <= 255; valB++)
+      {
+        key[byteB] = (uint8_t)valB;
+        c(key,keylen);
+      }
+
+      key[byteB] = 0;
+    }
+
+    key[byteA] = 0;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
+{
+  typedef CollisionMap<hashtype,ByteVec> cmap_t;
+
+  for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
+  {
+    const hashtype & hash = (*it).first;
+
+    printf("Hash - ");
+    printbytes(&hash,sizeof(hashtype));
+    printf("\n");
+
+    std::vector<ByteVec> & keys = (*it).second;
+
+    for(int i = 0; i < (int)keys.size(); i++)
+    {
+      ByteVec & key = keys[i];
+
+      printf("Key  - ");
+      printbytes(&key[0],(int)key.size());
+      printf("\n");
+    }
+    printf("\n");
+  }
+
+}
+
+// test code
+
+void ReportCollisions ( pfHash hash )
+{
+  printf("Hashing keyset\n");
+
+  std::vector<uint128_t> hashes;
+
+  HashCallback<uint128_t> c(hash,hashes);
+
+  TwoBytesKeygen(20,c);
+
+  printf("%d hashes\n",(int)hashes.size());
+
+  printf("Finding collisions\n");
+
+  HashSet<uint128_t> collisions;
+
+  FindCollisions(hashes,collisions,1000);
+
+  printf("%d collisions\n",(int)collisions.size());
+
+  printf("Mapping collisions\n");
+
+  CollisionMap<uint128_t,ByteVec> cmap;
+
+  CollisionCallback<uint128_t> c2(hash,collisions,cmap);
+
+  TwoBytesKeygen(20,c2);
+
+  printf("Dumping collisions\n");
+
+  DumpCollisionMap(cmap);
+}
diff --git a/KeysetTest.h b/KeysetTest.h
new file mode 100644
index 0000000..dce54d2
--- /dev/null
+++ b/KeysetTest.h
@@ -0,0 +1,439 @@
+//-----------------------------------------------------------------------------
+// Keyset tests generate various sorts of difficult-to-hash keysets and compare
+// the distribution and collision frequency of the hash results against an
+// ideal random distribution
+
+// The sanity checks are also in this cpp/h
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"
+#include "Random.h"   // for rand_p
+
+#include <algorithm>  // for std::swap
+#include <assert.h>
+
+//-----------------------------------------------------------------------------
+// Sanity tests
+
+bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
+bool SanityTest         ( pfHash hash, const int hashbits );
+void AppendedZeroesTest ( pfHash hash, const int hashbits );
+
+//-----------------------------------------------------------------------------
+// Keyset 'Combination' - all possible combinations of input blocks
+
+template< typename hashtype >
+void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
+                  uint32_t * blocks, int blockcount, 
+                pfHash hash, std::vector<hashtype> & hashes )
+{
+  if(len == maxlen) return;
+
+  for(int i = 0; i < blockcount; i++)
+  {
+    key[len] = blocks[i];
+  
+    //if(len == maxlen-1)
+    {
+      hashtype h;
+      hash(key,(len+1) * sizeof(uint32_t),0,&h);
+      hashes.push_back(h);
+    }
+
+    //else
+    {
+      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+    }
+  }
+}
+
+template< typename hashtype >
+bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  uint32_t * key = new uint32_t[maxlen];
+
+  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+
+  delete [] key;
+
+  printf("%d keys\n",(int)hashes.size());
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
+// consisting of all possible permutations of those blocks
+
+template< typename hashtype >
+void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
+{
+  if(k == blockcount-1)
+  {
+    hashtype h;
+
+    hash(blocks,blockcount * sizeof(uint32_t),0,&h);
+
+    hashes.push_back(h);
+
+    return;
+  }
+
+  for(int i = k; i < blockcount; i++)
+  {
+    std::swap(blocks[k],blocks[i]);
+
+    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
+
+    std::swap(blocks[k],blocks[i]);
+  }
+}
+
+template< typename hashtype >
+bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+  printf("Keyset 'Permutation' - %d blocks - ",blockcount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
+
+  printf("%d keys\n",(int)hashes.size());
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
+
+template < typename keytype, typename hashtype >
+void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
+{
+  const int nbytes = sizeof(keytype);
+  const int nbits = nbytes * 8;
+
+  hashtype h;
+
+  for(int i = start; i < nbits; i++)
+  {
+    flipbit(&k,nbytes,i);
+
+    if(inclusive || (bitsleft == 1))
+    {
+      hash(&k,sizeof(keytype),0,&h);
+      hashes.push_back(h);
+    }
+
+    if(bitsleft > 1)
+    {
+      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
+    }
+
+    flipbit(&k,nbytes,i);
+  }
+}
+
+//----------
+
+template < int keybits, typename hashtype >
+bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
+{
+  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
+
+  typedef Blob<keybits> keytype;
+
+  std::vector<hashtype> hashes;
+
+  keytype k;
+  memset(&k,0,sizeof(k));
+
+  if(inclusive)
+  {
+    hashtype h;
+
+    hash(&k,sizeof(keytype),0,&h);
+
+    hashes.push_back(h);
+  }
+
+  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
+
+  printf("%d keys\n",(int)hashes.size());
+
+  bool result = true;
+  
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
+// all possible keys with bits set in that window
+
+template < typename keytype, typename hashtype >
+bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
+{
+  const int keybits = sizeof(keytype) * 8;
+  const int keycount = 1 << windowbits;
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  bool result = true;
+
+  int testcount = keybits;
+
+  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
+
+  for(int j = 0; j <= testcount; j++)
+  {
+    int minbit = j;
+
+    keytype key;
+
+    for(int i = 0; i < keycount; i++)
+    {
+      key = i;
+      //key = key << minbit;
+
+      lrot(&key,sizeof(keytype),minbit);
+
+      hash(&key,sizeof(keytype),0,&hashes[i]);
+    }
+
+    printf("Window at %3d - ",j);
+
+    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
+
+    //printf("\n");
+  }
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
+// bytes.
+
+// (This keyset type is designed to make MurmurHash2 fail)
+
+template < typename hashtype >
+bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
+{
+  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
+
+  Rand r(483723);
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  int keyLen = cycleLen * cycleReps;
+
+  uint8_t * cycle = new uint8_t[cycleLen + 16];
+  uint8_t * key = new uint8_t[keyLen];
+
+  //----------
+
+  for(int i = 0; i < keycount; i++)
+  {
+    r.rand_p(cycle,cycleLen);
+
+    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+
+    for(int j = 0; j < keyLen; j++)
+    {
+      key[j] = cycle[j % cycleLen];
+    }
+
+    hash(key,keyLen,0,&hashes[i]);
+  }
+
+  //----------
+  
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
+
+  delete [] cycle;
+  delete [] key;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c );
+
+template < typename hashtype >
+bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
+{
+  std::vector<hashtype> hashes;
+
+  HashCallback<hashtype> c(hash,hashes);
+
+  TwoBytesKeygen(maxlen,c);
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
+// where "core" consists of all possible combinations of the given character
+// set of length N.
+
+template < typename hashtype >
+bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
+{
+  const int prefixlen = (int)strlen(prefix);
+  const int suffixlen = (int)strlen(suffix);
+  const int corecount = (int)strlen(coreset);
+
+  const int keybytes = prefixlen + corelen + suffixlen;
+  const int keycount = (int)pow(double(corecount),double(corelen));
+
+  printf("Keyset 'Text' - keys of form \"%s[",prefix);
+  for(int i = 0; i < corelen; i++) printf("X");		
+  printf("]%s\" - %d keys\n",suffix,keycount);
+
+  uint8_t * key = new uint8_t[keybytes+1];
+
+  key[keybytes] = 0;
+
+  memcpy(key,prefix,prefixlen);
+  memcpy(key+prefixlen+corelen,suffix,suffixlen);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    int t = i;
+
+    for(int j = 0; j < corelen; j++)
+    {
+      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
+    }
+
+    hash(key,keybytes,0,&hashes[i]);
+  }
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  delete [] key;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
+
+// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
+
+template < typename hashtype >
+bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
+{
+  int keycount = 64*1024;
+
+  printf("Keyset 'Zeroes' - %d keys\n",keycount);
+
+  unsigned char * nullblock = new unsigned char[keycount];
+  memset(nullblock,0,keycount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(nullblock,i,0,&hashes[i]);
+  }
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  delete [] nullblock;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
+
+template < typename hashtype >
+bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
+{
+  printf("Keyset 'Seed' - %d keys\n",keycount);
+
+  const char * text = "The quick brown fox jumps over the lazy dog";
+  const int len = (int)strlen(text);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(text,len,i,&hashes[i]);
+  }
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
new file mode 100644
index 0000000..8225566
--- /dev/null
+++ b/MurmurHash1.cpp
@@ -0,0 +1,174 @@
+//-----------------------------------------------------------------------------
+// MurmurHash was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "MurmurHash1.h"
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
+{
+  const unsigned int m = 0xc6a4a793;
+
+  const int r = 16;
+
+  unsigned int h = seed ^ (len * m);
+
+  //----------
+  
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    unsigned int k = *(unsigned int *)data;
+
+    h += k;
+    h *= m;
+    h ^= h >> 16;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  //----------
+  
+  switch(len)
+  {
+  case 3:
+    h += data[2] << 16;
+  case 2:
+    h += data[1] << 8;
+  case 1:
+    h += data[0];
+    h *= m;
+    h ^= h >> r;
+  };
+ 
+  //----------
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash1Aligned, by Austin Appleby
+
+// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance should be equal to or better than the simple version.
+
+unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
+{
+  const unsigned int m = 0xc6a4a793;
+  const int r = 16;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  unsigned int h = seed ^ (len * m);
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    unsigned int t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(unsigned int *)data;
+      t = (t >> sr) | (d << sl);
+      h += t;
+      h *= m;
+      h ^= h >> r;
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    int pack = len < align ? len : align;
+
+    d = 0;
+
+    switch(pack)
+    {
+    case 3: d |= data[2] << 16;
+    case 2: d |= data[1] << 8;
+    case 1: d |= data[0];
+    case 0: h += (t >> sr) | (d << sl);
+        h *= m;
+        h ^= h >> r;
+    }
+
+    data += pack;
+    len -= pack;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      h += *(unsigned int *)data;
+      h *= m;
+      h ^= h >> r;
+
+      data += 4;
+      len -= 4;
+    }
+  }
+
+  //----------
+  // Handle tail bytes
+
+  switch(len)
+  {
+  case 3: h += data[2] << 16;
+  case 2: h += data[1] << 8;
+  case 1: h += data[0];
+      h *= m;
+      h ^= h >> r;
+  };
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
+}
+
diff --git a/MurmurHash1.h b/MurmurHash1.h
new file mode 100644
index 0000000..93b08c3
--- /dev/null
+++ b/MurmurHash1.h
@@ -0,0 +1,34 @@
+//-----------------------------------------------------------------------------
+// MurmurHash1 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH1_H_
+#define _MURMURHASH1_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH1_H_
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
new file mode 100644
index 0000000..cd1e53a
--- /dev/null
+++ b/MurmurHash2.cpp
@@ -0,0 +1,523 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "MurmurHash2.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  // Initialize the hash to a 'random' value
+
+  uint32_t h = seed ^ len;
+
+  // Mix 4 bytes at a time into the hash
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+      h *= m;
+  };
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
+{
+  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+  const uint64_t * data = (const uint64_t *)key;
+  const uint64_t * end = data + (len/8);
+
+  while(data != end)
+  {
+    uint64_t k = *data++;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m; 
+    
+    h ^= k;
+    h *= m; 
+  }
+
+  const unsigned char * data2 = (const unsigned char*)data;
+
+  switch(len & 7)
+  {
+  case 7: h ^= uint64_t(data2[6]) << 48;
+  case 6: h ^= uint64_t(data2[5]) << 40;
+  case 5: h ^= uint64_t(data2[4]) << 32;
+  case 4: h ^= uint64_t(data2[3]) << 24;
+  case 3: h ^= uint64_t(data2[2]) << 16;
+  case 2: h ^= uint64_t(data2[1]) << 8;
+  case 1: h ^= uint64_t(data2[0]);
+          h *= m;
+  };
+ 
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
+
+  return h;
+} 
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h1 = uint32_t(seed) ^ len;
+  uint32_t h2 = uint32_t(seed >> 32);
+
+  const uint32_t * data = (const uint32_t *)key;
+
+  while(len >= 8)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+
+    uint32_t k2 = *data++;
+    k2 *= m; k2 ^= k2 >> r; k2 *= m;
+    h2 *= m; h2 ^= k2;
+    len -= 4;
+  }
+
+  if(len >= 4)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+  }
+
+  switch(len)
+  {
+  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+  case 1: h2 ^= ((unsigned char*)data)[0];
+      h2 *= m;
+  };
+
+  h1 ^= h2 >> 18; h1 *= m;
+  h2 ^= h1 >> 22; h2 *= m;
+  h1 ^= h2 >> 17; h1 *= m;
+  h2 ^= h1 >> 19; h2 *= m;
+
+  uint64_t h = h1;
+
+  h = (h << 32) | h2;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2A, by Austin Appleby
+
+// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
+// construction. Bulk speed should be identical to Murmur2, small-key speed 
+// will be 10%-20% slower due to the added overhead at the end of the hash.
+
+// This variant fixes a minor issue where null keys were more likely to
+// collide with each other than expected, and also makes the function
+// more amenable to incremental implementations.
+
+#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+  uint32_t l = len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  uint32_t h = seed;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
+
+    mmix(h,k);
+
+    data += 4;
+    len -= 4;
+  }
+
+  uint32_t t = 0;
+
+  switch(len)
+  {
+  case 3: t ^= data[2] << 16;
+  case 2: t ^= data[1] << 8;
+  case 1: t ^= data[0];
+  };
+
+  mmix(h,t);
+  mmix(h,l);
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+//-----------------------------------------------------------------------------
+// CMurmurHash2A, by Austin Appleby
+
+// This is a sample implementation of MurmurHash2A designed to work 
+// incrementally.
+
+// Usage - 
+
+// CMurmurHash2A hasher
+// hasher.Begin(seed);
+// hasher.Add(data1,size1);
+// hasher.Add(data2,size2);
+// ...
+// hasher.Add(dataN,sizeN);
+// uint32_t hash = hasher.End()
+
+class CMurmurHash2A
+{
+public:
+
+  void Begin ( uint32_t seed = 0 )
+  {
+    m_hash  = seed;
+    m_tail  = 0;
+    m_count = 0;
+    m_size  = 0;
+  }
+
+  void Add ( const unsigned char * data, int len )
+  {
+    m_size += len;
+
+    MixTail(data,len);
+
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t*)data;
+
+      mmix(m_hash,k);
+
+      data += 4;
+      len -= 4;
+    }
+
+    MixTail(data,len);
+  }
+
+  uint32_t End ( void )
+  {
+    mmix(m_hash,m_tail);
+    mmix(m_hash,m_size);
+
+    m_hash ^= m_hash >> 13;
+    m_hash *= m;
+    m_hash ^= m_hash >> 15;
+
+    return m_hash;
+  }
+
+private:
+
+  static const uint32_t m = 0x5bd1e995;
+  static const int r = 24;
+
+  void MixTail ( const unsigned char * & data, int & len )
+  {
+    while( len && ((len<4) || m_count) )
+    {
+      m_tail |= (*data++) << (m_count * 8);
+
+      m_count++;
+      len--;
+
+      if(m_count == 4)
+      {
+        mmix(m_hash,m_tail);
+        m_tail = 0;
+        m_count = 0;
+      }
+    }
+  }
+
+  uint32_t m_hash;
+  uint32_t m_tail;
+  uint32_t m_count;
+  uint32_t m_size;
+};
+
+//-----------------------------------------------------------------------------
+// MurmurHashNeutral2, by Austin Appleby
+
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h = seed ^ len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k;
+
+    k  = data[0];
+    k |= data[1] << 8;
+    k |= data[2] << 16;
+    k |= data[3] << 24;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+          h *= m;
+  };
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  uint32_t h = seed ^ len;
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    uint32_t t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      uint32_t k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    d = 0;
+
+    if(len >= align)
+    {
+      switch(align)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      }
+
+      uint32_t k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      //----------
+      // Handle tail bytes
+
+      switch(len)
+      {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0];
+          h *= m;
+      };
+    }
+    else
+    {
+      switch(len)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      case 0: h ^= (t >> sr) | (d << sl);
+          h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
+
+      data += 4;
+      len -= 4;
+    }
+
+    //----------
+    // Handle tail bytes
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash2.h b/MurmurHash2.h
new file mode 100644
index 0000000..32993c2
--- /dev/null
+++ b/MurmurHash2.h
@@ -0,0 +1,39 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH2_H_
+#define _MURMURHASH2_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
+uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
+uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
+uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH2_H_
+
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
new file mode 100644
index 0000000..5a55b7a
--- /dev/null
+++ b/MurmurHash3.cpp
@@ -0,0 +1,335 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix(h1);
+
+  *(uint32_t*)out = h1;
+} 
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  const uint32_t c1 = 0x239b961b; 
+  const uint32_t c2 = 0xab0e9789;
+  const uint32_t c3 = 0x38b34ae5; 
+  const uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash3.h b/MurmurHash3.h
new file mode 100644
index 0000000..54e9d3f
--- /dev/null
+++ b/MurmurHash3.h
@@ -0,0 +1,37 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/PMurHash.c b/PMurHash.c
new file mode 100644
index 0000000..0175012
--- /dev/null
+++ b/PMurHash.c
@@ -0,0 +1,317 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/*-----------------------------------------------------------------------------
+ 
+If you want to understand the MurmurHash algorithm you would be much better
+off reading the original source. Just point your browser at:
+http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+
+
+What this version provides?
+
+1. Progressive data feeding. Useful when the entire payload to be hashed
+does not fit in memory or when the data is streamed through the application.
+Also useful when hashing a number of strings with a common prefix. A partial
+hash of a prefix string can be generated and reused for each suffix string.
+
+2. Portability. Plain old C so that it should compile on any old compiler.
+Both CPU endian and access-alignment neutral, but avoiding inefficient code
+when possible depending on CPU capabilities.
+
+3. Drop in. I personally like nice self contained public domain code, making it
+easy to pilfer without loads of refactoring to work properly in the existing
+application code & makefile structure and mucking around with licence files.
+Just copy PMurHash.h and PMurHash.c and you're ready to go.
+
+
+How does it work?
+
+We can only process entire 32 bit chunks of input, except for the very end
+that may be shorter. So along with the partial hash we need to give back to
+the caller a carry containing up to 3 bytes that we were unable to process.
+This carry also needs to record the number of bytes the carry holds. I use
+the low 2 bits as a count (0..3) and the carry bytes are shifted into the
+high byte in stream order.
+
+To handle endianess I simply use a macro that reads a uint32_t and define
+that macro to be a direct read on little endian machines, a read and swap
+on big endian machines, or a byte-by-byte read if the endianess is unknown.
+
+-----------------------------------------------------------------------------*/
+
+
+#include "PMurHash.h"
+
+/* I used ugly type names in the header to avoid potential conflicts with
+ * application or system typedefs & defines. Since I'm not including any more
+ * headers below here I can rename these so that the code reads like C99 */
+#undef uint32_t
+#define uint32_t MH_UINT32
+#undef uint8_t
+#define uint8_t  MH_UINT8
+
+/* MSVC warnings we choose to ignore */
+#if defined(_MSC_VER)
+  #pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * READ_UINT32(x)   Read a little endian unsigned 32-bit int
+ * UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
+ * ROTL32(x,r)      Rotate x left by r bits
+ */
+
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+  #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+  #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+  #define __BYTE_ORDER __LITTLE_ENDIAN
+  #define UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+  #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+  #if defined(__ARMEL__) || defined(__MIPSEL__)
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #endif
+  #if defined(__ARMEB__) || defined(__MIPSEB__)
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+  /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+  #define READ_UINT32(ptr)   (*((uint32_t*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+  /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+  #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+    #define READ_UINT32(ptr)   (__builtin_bswap32(*((uint32_t*)(ptr))))
+  #else
+    /* Without a known fast bswap32 we're just as well off doing this */
+    #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+    #define UNALIGNED_SAFE
+  #endif
+#else
+  /* Unknown endianess so last resort is to read individual bytes */
+  #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+  /* Since we're not doing word-reads we can skip the messing about with realignment */
+  #define UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
+  #define ROTL32(x,r)  _rotl(x,r)
+#else
+  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+  #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define C1  (0xcc9e2d51)
+#define C2  (0x1b873593)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define DOBLOCK(h1, k1) do{ \
+        k1 *= C1; \
+        k1 = ROTL32(k1,15); \
+        k1 *= C2; \
+        \
+        h1 ^= k1; \
+        h1 = ROTL32(h1,13); \
+        h1 = h1*5+0xe6546b64; \
+    }while(0)
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
+    int _i = cnt; \
+    while(_i--) { \
+        c = c>>8 | *ptr++<<24; \
+        n++; len--; \
+        if(n==4) { \
+            DOBLOCK(h1, c); \
+            n = 0; \
+        } \
+    } }while(0)
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
+ * if wanted. Both ph1 and pcarry are required arguments. */
+void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
+{
+  uint32_t h1 = *ph1;
+  uint32_t c = *pcarry;
+
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end;
+
+  /* Extract carry count from low 2 bits of c value */
+  int n = c & 3;
+
+#if defined(UNALIGNED_SAFE)
+  /* This CPU handles unaligned word access */
+
+  /* Consume any carry bytes */
+  int i = (4-n) & 3;
+  if(i && i <= len) {
+    DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* Process 32-bit chunks */
+  end = ptr + len/4*4;
+  for( ; ptr < end ; ptr+=4) {
+    uint32_t k1 = READ_UINT32(ptr);
+    DOBLOCK(h1, k1);
+  }
+
+#else /*UNALIGNED_SAFE*/
+  /* This CPU does not handle unaligned word access */
+
+  /* Consume enough so that the next data byte is word aligned */
+  int i = -(long)ptr & 3;
+  if(i && i <= len) {
+      DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+  end = ptr + len/4*4;
+  switch(n) { /* how many bytes in c */
+  case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = READ_UINT32(ptr);
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>24;
+      c = READ_UINT32(ptr);
+      k1 |= c<<8;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>16;
+      c = READ_UINT32(ptr);
+      k1 |= c<<16;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>8;
+      c = READ_UINT32(ptr);
+      k1 |= c<<24;
+      DOBLOCK(h1, k1);
+    }
+  }
+#endif /*UNALIGNED_SAFE*/
+
+  /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
+  len -= len/4*4;
+
+  /* Append any remaining bytes into carry */
+  DOBYTES(len, h1, c, n, ptr, len);
+
+  /* Copy out new running hash and carry */
+  *ph1 = h1;
+  *pcarry = (c & ~0xff) | n;
+} 
+
+/*---------------------------------------------------------------------------*/
+
+/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
+uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
+{
+  uint32_t k1;
+  int n = carry & 3;
+  if(n) {
+    k1 = carry >> (4-n)*8;
+    k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
+  }
+  h ^= total_length;
+
+  /* fmix */
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Murmur3A compatable all-at-once */
+uint32_t PMurHash32(uint32_t seed, const void *key, int len)
+{
+  uint32_t h1=seed, carry=0;
+  PMurHash32_Process(&h1, &carry, key, len);
+  return PMurHash32_Result(h1, carry, len);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Provide an API suitable for smhasher */
+void PMurHash32_test(const void *key, int len, uint32_t seed, void *out)
+{
+  uint32_t h1=seed, carry=0;
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end = ptr + len;
+
+#if 0 /* Exercise the progressive processing */
+  while(ptr < end) {
+    //const uint8_t *mid = ptr + rand()%(end-ptr)+1;
+    const uint8_t *mid = ptr + (rand()&0xF);
+    mid = mid<end?mid:end;
+    PMurHash32_Process(&h1, &carry, ptr, mid-ptr);
+    ptr = mid;
+  }
+#else
+  PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr));
+#endif
+  h1 = PMurHash32_Result(h1, carry, len);
+  *(uint32_t*)out = h1;
+}
+
+/*---------------------------------------------------------------------------*/
diff --git a/PMurHash.h b/PMurHash.h
new file mode 100644
index 0000000..28ead00
--- /dev/null
+++ b/PMurHash.h
@@ -0,0 +1,64 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/* ------------------------------------------------------------------------- */
+/* Determine what native type to use for uint32_t */
+
+/* We can't use the name 'uint32_t' here because it will conflict with
+ * any version provided by the system headers or application. */
+
+/* First look for special cases */
+#if defined(_MSC_VER)
+  #define MH_UINT32 unsigned long
+#endif
+
+/* If the compiler says it's C99 then take its word for it */
+#if !defined(MH_UINT32) && ( \
+     defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L )
+  #include <stdint.h>
+  #define MH_UINT32 uint32_t
+#endif
+
+/* Otherwise try testing against max value macros from limit.h */
+#if !defined(MH_UINT32)
+  #include  <limits.h>
+  #if   (USHRT_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned short
+  #elif (UINT_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned int
+  #elif (ULONG_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned long
+  #endif
+#endif
+
+#if !defined(MH_UINT32)
+  #error Unable to determine type name for unsigned 32-bit int
+#endif
+
+/* I'm yet to work on a platform where 'unsigned char' is not 8 bits */
+#define MH_UINT8  unsigned char
+
+
+/* ------------------------------------------------------------------------- */
+/* Prototypes */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len);
+MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length);
+MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len);
+
+void PMurHash32_test(const void *key, int len, MH_UINT32 seed, void *out);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Platform.cpp b/Platform.cpp
new file mode 100644
index 0000000..3c97694
--- /dev/null
+++ b/Platform.cpp
@@ -0,0 +1,42 @@
+#include "Platform.h"
+
+#include <stdio.h>
+
+void testRDTSC ( void )
+{
+  int64_t temp = rdtsc();
+
+  printf("%d",(int)temp);
+}
+
+#if defined(_MSC_VER)
+
+#include <windows.h>
+
+void SetAffinity ( int cpu )
+{
+  SetProcessAffinityMask(GetCurrentProcess(),cpu);
+  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+}
+
+#else
+
+#include <sched.h>
+
+void SetAffinity ( int /*cpu*/ )
+{
+#if !defined(__CYGWIN__) && !defined(__APPLE__)
+  cpu_set_t mask;
+    
+  CPU_ZERO(&mask);
+    
+  CPU_SET(2,&mask);
+    
+  if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
+  {
+    printf("WARNING: Could not set CPU affinity\n");
+  }
+#endif
+}
+
+#endif
diff --git a/Platform.h b/Platform.h
new file mode 100644
index 0000000..6d0f0df
--- /dev/null
+++ b/Platform.h
@@ -0,0 +1,94 @@
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+#pragma once
+
+void SetAffinity ( int cpu );
+
+//-----------------------------------------------------------------------------
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+#define	NEVER_INLINE  __declspec(noinline)
+
+#include <stdlib.h>
+#include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
+#include <intrin.h> // for __rdtsc
+#include "pstdint.h"
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+#define ROTR32(x,y)	_rotr(x,y)
+#define ROTR64(x,y)	_rotr64(x,y)
+
+#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4702)
+
+#define BIG_CONSTANT(x) (x)
+
+// RDTSC == Read Time Stamp Counter
+
+#define rdtsc() __rdtsc()
+
+//-----------------------------------------------------------------------------
+// Other compilers
+
+#else	//	defined(_MSC_VER)
+
+#include <stdint.h>
+
+#define	FORCE_INLINE inline __attribute__((always_inline))
+#define	NEVER_INLINE __attribute__((noinline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+inline uint32_t rotr32 ( uint32_t x, int8_t r )
+{
+  return (x >> r) | (x << (32 - r));
+}
+
+inline uint64_t rotr64 ( uint64_t x, int8_t r )
+{
+  return (x >> r) | (x << (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+#define	ROTR32(x,y)	rotr32(x,y)
+#define ROTR64(x,y)	rotr64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+__inline__ unsigned long long int rdtsc()
+{
+#ifdef __x86_64__
+    unsigned int a, d;
+    __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
+    return (unsigned long)a | ((unsigned long)d << 32);
+#elif defined(__i386__)
+    unsigned long long int x;
+    __asm__ volatile ("rdtsc" : "=A" (x));
+    return x;
+#else
+#define NO_CYCLE_COUNTER
+    return 0;
+#endif
+}
+
+#include <strings.h>
+#define _stricmp strcasecmp
+
+#endif	//	!defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
diff --git a/Random.cpp b/Random.cpp
new file mode 100644
index 0000000..87de595
--- /dev/null
+++ b/Random.cpp
@@ -0,0 +1,8 @@
+#include "Random.h"
+
+Rand g_rand1(1);
+Rand g_rand2(2);
+Rand g_rand3(3);
+Rand g_rand4(4);
+
+//-----------------------------------------------------------------------------
diff --git a/Random.h b/Random.h
new file mode 100644
index 0000000..7e0df3f
--- /dev/null
+++ b/Random.h
@@ -0,0 +1,117 @@
+#pragma once
+
+#include "Types.h"
+
+//-----------------------------------------------------------------------------
+// Xorshift RNG based on code by George Marsaglia
+// http://en.wikipedia.org/wiki/Xorshift
+
+struct Rand
+{
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+  uint32_t w;
+
+  Rand()
+  {
+    reseed(uint32_t(0));
+  }
+
+  Rand( uint32_t seed )
+  {
+    reseed(seed);
+  }
+
+  void reseed ( uint32_t seed )
+  {
+    x = 0x498b3bc5 ^ seed;
+    y = 0;
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  void reseed ( uint64_t seed )
+  {
+    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
+    y = 0x5a05089a ^ (uint32_t)(seed >> 32);
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  //-----------------------------------------------------------------------------
+
+  void mix ( void )
+  {
+    uint32_t t = x ^ (x << 11);
+    x = y; y = z; z = w;
+    w = w ^ (w >> 19) ^ t ^ (t >> 8); 
+  }
+
+  uint32_t rand_u32 ( void )
+  {
+    mix();
+
+    return x;
+  }
+
+  uint64_t rand_u64 ( void ) 
+  {
+    mix();
+
+    uint64_t a = x;
+    uint64_t b = y;
+
+    return (a << 32) | b;
+  }
+
+  void rand_p ( void * blob, int bytes )
+  {
+    uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
+
+    while(bytes >= 4)
+    {
+      blocks[0] = rand_u32();
+      blocks++;
+      bytes -= 4;
+    }
+
+    uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
+
+    for(int i = 0; i < bytes; i++)
+    {
+      tail[i] = (uint8_t)rand_u32();
+    }
+  }
+};
+
+//-----------------------------------------------------------------------------
+
+extern Rand g_rand1;
+
+inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
+inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
+
+inline void rand_p ( void * blob, int bytes )
+{
+  uint32_t * blocks = (uint32_t*)blob;
+
+  while(bytes >= 4)
+  {
+    *blocks++ = rand_u32();
+    bytes -= 4;
+  }
+
+  uint8_t * tail = (uint8_t*)blocks;
+
+  for(int i = 0; i < bytes; i++)
+  {
+    tail[i] = (uint8_t)rand_u32();
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
new file mode 100644
index 0000000..d91f6e4
--- /dev/null
+++ b/SpeedTest.cpp
@@ -0,0 +1,242 @@
+#include "SpeedTest.h"
+
+#include "Random.h"
+
+#include <stdio.h>   // for printf
+#include <memory.h>  // for memset
+#include <math.h>    // for sqrt
+#include <algorithm> // for sort
+
+//-----------------------------------------------------------------------------
+// We view our timing values as a series of random variables V that has been
+// contaminated with occasional outliers due to cache misses, thread
+// preemption, etcetera. To filter out the outliers, we search for the largest
+// subset of V such that all its values are within three standard deviations
+// of the mean.
+
+double CalcMean ( std::vector<double> & v )
+{
+  double mean = 0;
+  
+  for(int i = 0; i < (int)v.size(); i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(v.size());
+  
+  return mean;
+}
+
+double CalcMean ( std::vector<double> & v, int a, int b )
+{
+  double mean = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= (b-a+1);
+  
+  return mean;
+}
+
+double CalcStdv ( std::vector<double> & v, int a, int b )
+{
+  double mean = CalcMean(v,a,b);
+
+  double stdv = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    double x = v[i] - mean;
+    
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / (b-a+1));
+  
+  return stdv;
+}
+
+// Return true if the largest value in v[0,len) is more than three
+// standard deviations from the mean
+
+bool ContainsOutlier ( std::vector<double> & v, size_t len )
+{
+  double mean = 0;
+  
+  for(size_t i = 0; i < len; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(len);
+  
+  double stdv = 0;
+  
+  for(size_t i = 0; i < len; i++)
+  {
+    double x = v[i] - mean;
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / double(len));
+
+  double cutoff = mean + stdv*3;
+  
+  return v[len-1] > cutoff;  
+}
+
+// Do a binary search to find the largest subset of v that does not contain
+// outliers.
+
+void FilterOutliers ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  size_t len = 0;
+  
+  for(size_t x = 0x40000000; x; x = x >> 1 )
+  {
+    if((len | x) >= v.size()) continue;
+    
+    if(!ContainsOutlier(v,len | x))
+    {
+      len |= x;
+    }
+  }
+  
+  v.resize(len);
+}
+
+// Iteratively tighten the set to find a subset that does not contain
+// outliers. I'm not positive this works correctly in all cases.
+
+void FilterOutliers2 ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  int a = 0;
+  int b = (int)(v.size() - 1);
+  
+  for(int i = 0; i < 10; i++)
+  {
+    //printf("%d %d\n",a,b);
+  
+    double mean = CalcMean(v,a,b);
+    double stdv = CalcStdv(v,a,b);
+    
+    double cutA = mean - stdv*3;  
+    double cutB = mean + stdv*3;
+    
+    while((a < b) && (v[a] < cutA)) a++;
+    while((b > a) && (v[b] > cutB)) b--;
+  }
+  
+  std::vector<double> v2;
+  
+  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
+  
+  v.swap(v2);
+}
+
+//-----------------------------------------------------------------------------
+// We really want the rdtsc() calls to bracket the function call as tightly
+// as possible, but that's hard to do portably. We'll try and get as close as
+// possible by marking the function as NEVER_INLINE (to keep the optimizer from
+// moving it) and marking the timing variables as "volatile register".
+
+NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
+{
+  volatile register int64_t begin,end;
+  
+  uint32_t temp[16];
+  
+  begin = rdtsc();
+  
+  hash(key,len,seed,temp);
+  
+  end = rdtsc();
+  
+  return end-begin;
+}
+
+//-----------------------------------------------------------------------------
+
+double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
+{
+  Rand r(seed);
+  
+  uint8_t * buf = new uint8_t[blocksize + 512];
+
+  uint64_t t1 = reinterpret_cast<uint64_t>(buf);
+  
+  t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
+  t1 += align;
+  
+  uint8_t * block = reinterpret_cast<uint8_t*>(t1);
+
+  r.rand_p(block,blocksize);
+
+  //----------
+
+  std::vector<double> times;
+  times.reserve(trials);
+
+  for(int itrial = 0; itrial < trials; itrial++)
+  {
+    r.rand_p(block,blocksize);
+    
+    double t = (double)timehash(hash,block,blocksize,itrial);
+    
+    if(t > 0) times.push_back(t);
+  }
+
+  //----------
+  
+  std::sort(times.begin(),times.end());
+  
+  FilterOutliers(times);
+  
+  delete [] buf;
+  
+  return CalcMean(times);
+}
+
+//-----------------------------------------------------------------------------
+// 256k blocks seem to give the best results.
+
+void BulkSpeedTest ( pfHash hash, uint32_t seed )
+{
+  const int trials = 2999;
+  const int blocksize = 256 * 1024;
+
+  printf("Bulk speed test - %d-byte keys\n",blocksize);
+
+  for(int align = 0; align < 8; align++)
+  {
+    double cycles = SpeedTest(hash,seed,trials,blocksize,align);
+    
+    double bestbpc = double(blocksize)/cycles;
+    
+    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
+{
+  const int trials = 999999;
+
+  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  
+  double cycles = SpeedTest(hash,seed,trials,keysize,0);
+  
+  printf("%8.2f cycles/hash\n",cycles);  
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.h b/SpeedTest.h
new file mode 100644
index 0000000..7bd2167
--- /dev/null
+++ b/SpeedTest.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "Types.h"
+
+void BulkSpeedTest ( pfHash hash, uint32_t seed );
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );
+
+//-----------------------------------------------------------------------------
diff --git a/Spooky.cpp b/Spooky.cpp
new file mode 100644
index 0000000..47f5d75
--- /dev/null
+++ b/Spooky.cpp
@@ -0,0 +1,347 @@
+// Spooky Hash
+// A 128-bit noncryptographic hash, for checksums and table lookup
+// By Bob Jenkins.  Public domain.
+//   Oct 31 2010: published framework, disclaimer ShortHash isn't right
+//   Nov 7 2010: disabled ShortHash
+//   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
+
+#include <memory.h>
+#include "Spooky.h"
+
+#define ALLOW_UNALIGNED_READS 1
+
+//
+// short hash ... it could be used on any message, 
+// but it's used by Spooky just for short messages.
+//
+void SpookyHash::Short(
+    const void *message,
+    size_t length,
+    uint64 *hash1,
+    uint64 *hash2)
+{
+    uint64 buf[sc_numVars];
+    union 
+    { 
+        const uint8 *p8; 
+        uint32 *p32;
+        uint64 *p64; 
+        size_t i; 
+    } u;
+
+    u.p8 = (const uint8 *)message;
+    
+    if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
+    {
+        memcpy(buf, message, length);
+        u.p64 = buf;
+    }
+
+    size_t remainder = length%32;
+    uint64 a=*hash1;
+    uint64 b=*hash2;
+    uint64 c=sc_const;
+    uint64 d=sc_const;
+
+    if (length > 15)
+    {
+        const uint64 *end = u.p64 + (length/32)*4;
+        
+        // handle all complete sets of 32 bytes
+        for (; u.p64 < end; u.p64 += 4)
+        {
+            c += u.p64[0];
+            d += u.p64[1];
+            ShortMix(a,b,c,d);
+            a += u.p64[2];
+            b += u.p64[3];
+        }
+        
+        //Handle the case of 16+ remaining bytes.
+        if (remainder >= 16)
+        {
+            c += u.p64[0];
+            d += u.p64[1];
+            ShortMix(a,b,c,d);
+            u.p64 += 2;
+            remainder -= 16;
+        }
+    }
+    
+    // Handle the last 0..15 bytes, and its length
+    d = ((uint64)length) << 56;
+    switch (remainder)
+    {
+    case 15:
+    d += ((uint64)u.p8[14]) << 48;
+    case 14:
+        d += ((uint64)u.p8[13]) << 40;
+    case 13:
+        d += ((uint64)u.p8[12]) << 32;
+    case 12:
+        d += u.p32[2];
+        c += u.p64[0];
+        break;
+    case 11:
+        d += ((uint64)u.p8[10]) << 16;
+    case 10:
+        d += ((uint64)u.p8[9]) << 8;
+    case 9:
+        d += (uint64)u.p8[8];
+    case 8:
+        c += u.p64[0];
+        break;
+    case 7:
+        c += ((uint64)u.p8[6]) << 48;
+    case 6:
+        c += ((uint64)u.p8[5]) << 40;
+    case 5:
+        c += ((uint64)u.p8[4]) << 32;
+    case 4:
+        c += u.p32[0];
+        break;
+    case 3:
+        c += ((uint64)u.p8[2]) << 16;
+    case 2:
+        c += ((uint64)u.p8[1]) << 8;
+    case 1:
+        c += (uint64)u.p8[0];
+        break;
+    case 0:
+        c += sc_const;
+        d += sc_const;
+    }
+    ShortEnd(a,b,c,d);
+    *hash1 = a;
+    *hash2 = b;
+}
+
+
+
+
+// do the whole hash in one call
+void SpookyHash::Hash128(
+    const void *message, 
+    size_t length, 
+    uint64 *hash1, 
+    uint64 *hash2)
+{
+    if (length < sc_bufSize)
+    {
+        Short(message, length, hash1, hash2);
+        return;
+    }
+
+    uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    uint64 buf[sc_numVars];
+    uint64 *end;
+    union 
+    { 
+        const uint8 *p8; 
+        uint64 *p64; 
+        size_t i; 
+    } u;
+    size_t remainder;
+    
+    h0=h3=h6=h9  = *hash1;
+    h1=h4=h7=h10 = *hash2;
+    h2=h5=h8=h11 = sc_const;
+    
+    u.p8 = (const uint8 *)message;
+    end = u.p64 + (length/sc_blockSize)*sc_numVars;
+
+    // handle all whole sc_blockSize blocks of bytes
+    if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
+    {
+        while (u.p64 < end)
+        { 
+            Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+    else
+    {
+        while (u.p64 < end)
+        {
+            memcpy(buf, u.p64, sc_blockSize);
+            Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+
+    // handle the last partial block of sc_blockSize bytes
+    remainder = (length - ((const uint8 *)end-(const uint8 *)message));
+    memcpy(buf, end, remainder);
+    memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder);
+    ((uint8 *)buf)[sc_blockSize-1] = remainder;
+    Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    
+    // do some final mixing 
+    End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    *hash1 = h0;
+    *hash2 = h1;
+}
+
+
+
+// init spooky state
+void SpookyHash::Init(uint64 seed1, uint64 seed2)
+{
+    m_length = 0;
+    m_remainder = 0;
+    m_state[0] = seed1;
+    m_state[1] = seed2;
+}
+
+
+// add a message fragment to the state
+void SpookyHash::Update(const void *message, size_t length)
+{
+    uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    size_t newLength = length + m_remainder;
+    uint8  remainder;
+    union 
+    { 
+        const uint8 *p8; 
+        uint64 *p64; 
+        size_t i; 
+    } u;
+    const uint64 *end;
+    
+    // Is this message fragment too short?  If it is, stuff it away.
+    if (newLength < sc_bufSize)
+    {
+        memcpy(&((uint8 *)m_data)[m_remainder], message, length);
+        m_length = length + m_length;
+        m_remainder = (uint8)newLength;
+        return;
+    }
+    
+    // init the variables
+    if (m_length < sc_bufSize)
+    {
+        h0=h3=h6=h9  = m_state[0];
+        h1=h4=h7=h10 = m_state[1];
+        h2=h5=h8=h11 = sc_const;
+    }
+    else
+    {
+        h0 = m_state[0];
+        h1 = m_state[1];
+        h2 = m_state[2];
+        h3 = m_state[3];
+        h4 = m_state[4];
+        h5 = m_state[5];
+        h6 = m_state[6];
+        h7 = m_state[7];
+        h8 = m_state[8];
+        h9 = m_state[9];
+        h10 = m_state[10];
+        h11 = m_state[11];
+    }
+    m_length = length + m_length;
+    
+    // if we've got anything stuffed away, use it now
+    if (m_remainder)
+    {
+        uint8 prefix = sc_bufSize-m_remainder;
+        memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix);
+        u.p64 = m_data;
+        Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        u.p8 = ((const uint8 *)message) + prefix;
+        length -= prefix;
+    }
+    else
+    {
+        u.p8 = (const uint8 *)message;
+    }
+    
+    // handle all whole blocks of sc_blockSize bytes
+    end = u.p64 + (length/sc_blockSize)*sc_numVars;
+    remainder = (uint8)(length-((const uint8 *)end-u.p8));
+    if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
+    {
+        while (u.p64 < end)
+        { 
+            Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+    else
+    {
+        while (u.p64 < end)
+        { 
+            memcpy(m_data, u.p8, sc_blockSize);
+            Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+
+    // stuff away the last few bytes
+    m_remainder = remainder;
+    memcpy(m_data, end, remainder);
+    
+    // stuff away the variables
+    m_state[0] = h0;
+    m_state[1] = h1;
+    m_state[2] = h2;
+    m_state[3] = h3;
+    m_state[4] = h4;
+    m_state[5] = h5;
+    m_state[6] = h6;
+    m_state[7] = h7;
+    m_state[8] = h8;
+    m_state[9] = h9;
+    m_state[10] = h10;
+    m_state[11] = h11;
+}
+
+
+// report the hash for the concatenation of all message fragments so far
+void SpookyHash::Final(uint64 *hash1, uint64 *hash2)
+{
+    // init the variables
+    if (m_length < sc_bufSize)
+    {
+        Short( m_data, m_length, hash1, hash2);
+        return;
+    }
+    
+    const uint64 *data = (const uint64 *)m_data;
+    uint8 remainder = m_remainder;
+    
+    uint64 h0 = m_state[0];
+    uint64 h1 = m_state[1];
+    uint64 h2 = m_state[2];
+    uint64 h3 = m_state[3];
+    uint64 h4 = m_state[4];
+    uint64 h5 = m_state[5];
+    uint64 h6 = m_state[6];
+    uint64 h7 = m_state[7];
+    uint64 h8 = m_state[8];
+    uint64 h9 = m_state[9];
+    uint64 h10 = m_state[10];
+    uint64 h11 = m_state[11];
+
+    if (remainder >= sc_blockSize)
+    {
+        // m_data can contain two blocks; handle any whole first block
+        Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	data += sc_numVars;
+	remainder -= sc_blockSize;
+    }
+
+    // mix in the last partial block, and the length mod sc_blockSize
+    memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder));
+
+    ((uint8 *)data)[sc_blockSize-1] = remainder;
+    Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    
+    // do some final mixing
+    End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+
+    *hash1 = h0;
+    *hash2 = h1;
+}
+
diff --git a/Spooky.h b/Spooky.h
new file mode 100644
index 0000000..047335d
--- /dev/null
+++ b/Spooky.h
@@ -0,0 +1,294 @@
+//
+// SpookyHash: a 128-bit noncryptographic hash function
+// By Bob Jenkins, public domain
+//   Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
+//   Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
+//   Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
+//   Feb  2 2012: production, same bits as beta
+//   Feb  5 2012: adjusted definitions of uint* to be more portable
+// 
+// Up to 4 bytes/cycle for long messages.  Reasonably fast for short messages.
+// All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
+//
+// This was developed for and tested on 64-bit x86-compatible processors.
+// It assumes the processor is little-endian.  There is a macro
+// controlling whether unaligned reads are allowed (by default they are).
+// This should be an equally good hash on big-endian machines, but it will
+// compute different results on them than on little-endian machines.
+//
+// Google's CityHash has similar specs to SpookyHash, and CityHash is faster
+// on some platforms.  MD4 and MD5 also have similar specs, but they are orders
+// of magnitude slower.  CRCs are two or more times slower, but unlike 
+// SpookyHash, they have nice math for combining the CRCs of pieces to form 
+// the CRCs of wholes.  There are also cryptographic hashes, but those are even 
+// slower than MD5.
+//
+
+#include "Platform.h"
+#include <stddef.h>
+
+#ifdef _MSC_VER
+# define INLINE __forceinline
+  typedef  unsigned __int64 uint64;
+  typedef  unsigned __int32 uint32;
+  typedef  unsigned __int16 uint16;
+  typedef  unsigned __int8  uint8;
+#else
+# include <stdint.h>
+# define INLINE inline
+  typedef  uint64_t  uint64;
+  typedef  uint32_t  uint32;
+  typedef  uint16_t  uint16;
+  typedef  uint8_t   uint8;
+#endif
+
+
+class SpookyHash
+{
+public:
+    //
+    // SpookyHash: hash a single message in one call, produce 128-bit output
+    //
+    static void Hash128(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint64 *hash1,        // in/out: in seed 1, out hash value 1
+        uint64 *hash2);       // in/out: in seed 2, out hash value 2
+
+    //
+    // Hash64: hash a single message in one call, return 64-bit output
+    //
+    static uint64 Hash64(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint64 seed)          // seed
+    {
+        uint64 hash1 = seed;
+        Hash128(message, length, &hash1, &seed);
+        return hash1;
+    }
+
+    //
+    // Hash32: hash a single message in one call, produce 32-bit output
+    //
+    static uint32 Hash32(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint32 seed)          // seed
+    {
+        uint64 hash1 = seed, hash2 = seed;
+        Hash128(message, length, &hash1, &hash2);
+        return (uint32)hash1;
+    }
+
+    //
+    // Init: initialize the context of a SpookyHash
+    //
+    void Init(
+        uint64 seed1,       // any 64-bit value will do, including 0
+        uint64 seed2);      // different seeds produce independent hashes
+    
+    //
+    // Update: add a piece of a message to a SpookyHash state
+    //
+    void Update(
+        const void *message,  // message fragment
+        size_t length);       // length of message fragment in bytes
+
+
+    //
+    // Final: compute the hash for the current SpookyHash state
+    //
+    // This does not modify the state; you can keep updating it afterward
+    //
+    // The result is the same as if SpookyHash() had been called with
+    // all the pieces concatenated into one message.
+    //
+    void Final(
+        uint64 *hash1,    // out only: first 64 bits of hash value.
+        uint64 *hash2);   // out only: second 64 bits of hash value.
+
+    //
+    // left rotate a 64-bit value by k bytes
+    //
+    static INLINE uint64 Rot64(uint64 x, int k)
+    {
+        return (x << k) | (x >> (64 - k));
+    }
+
+    //
+    // This is used if the input is 96 bytes long or longer.
+    //
+    // The internal state is fully overwritten every 96 bytes.
+    // Every input bit appears to cause at least 128 bits of entropy
+    // before 96 other bytes are combined, when run forward or backward
+    //   For every input bit,
+    //   Two inputs differing in just that input bit
+    //   Where "differ" means xor or subtraction
+    //   And the base value is random
+    //   When run forward or backwards one Mix
+    // I tried 3 pairs of each; they all differed by at least 212 bits.
+    //
+    static INLINE void Mix(
+        const uint64 *data, 
+        uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3,
+        uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7,
+        uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11)
+    {
+      s0 += data[0];    s2 ^= s10;    s11 ^= s0;    s0 = Rot64(s0,11);    s11 += s1;
+      s1 += data[1];    s3 ^= s11;    s0 ^= s1;    s1 = Rot64(s1,32);    s0 += s2;
+      s2 += data[2];    s4 ^= s0;    s1 ^= s2;    s2 = Rot64(s2,43);    s1 += s3;
+      s3 += data[3];    s5 ^= s1;    s2 ^= s3;    s3 = Rot64(s3,31);    s2 += s4;
+      s4 += data[4];    s6 ^= s2;    s3 ^= s4;    s4 = Rot64(s4,17);    s3 += s5;
+      s5 += data[5];    s7 ^= s3;    s4 ^= s5;    s5 = Rot64(s5,28);    s4 += s6;
+      s6 += data[6];    s8 ^= s4;    s5 ^= s6;    s6 = Rot64(s6,39);    s5 += s7;
+      s7 += data[7];    s9 ^= s5;    s6 ^= s7;    s7 = Rot64(s7,57);    s6 += s8;
+      s8 += data[8];    s10 ^= s6;    s7 ^= s8;    s8 = Rot64(s8,55);    s7 += s9;
+      s9 += data[9];    s11 ^= s7;    s8 ^= s9;    s9 = Rot64(s9,54);    s8 += s10;
+      s10 += data[10];    s0 ^= s8;    s9 ^= s10;    s10 = Rot64(s10,22);    s9 += s11;
+      s11 += data[11];    s1 ^= s9;    s10 ^= s11;    s11 = Rot64(s11,46);    s10 += s0;
+    }
+
+    //
+    // Mix all 12 inputs together so that h0, h1 are a hash of them all.
+    //
+    // For two inputs differing in just the input bits
+    // Where "differ" means xor or subtraction
+    // And the base value is random, or a counting value starting at that bit
+    // The final result will have each bit of h0, h1 flip
+    // For every input bit,
+    // with probability 50 +- .3%
+    // For every pair of input bits,
+    // with probability 50 +- 3%
+    //
+    // This does not rely on the last Mix() call having already mixed some.
+    // Two iterations was almost good enough for a 64-bit result, but a
+    // 128-bit result is reported, so End() does three iterations.
+    //
+    static INLINE void EndPartial(
+        uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
+        uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
+        uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
+    {
+        h11+= h1;    h2 ^= h11;   h1 = Rot64(h1,44);
+	h0 += h2;    h3 ^= h0;    h2 = Rot64(h2,15);
+	h1 += h3;    h4 ^= h1;    h3 = Rot64(h3,34);
+	h2 += h4;    h5 ^= h2;    h4 = Rot64(h4,21);
+	h3 += h5;    h6 ^= h3;    h5 = Rot64(h5,38);
+	h4 += h6;    h7 ^= h4;    h6 = Rot64(h6,33);
+	h5 += h7;    h8 ^= h5;    h7 = Rot64(h7,10);
+	h6 += h8;    h9 ^= h6;    h8 = Rot64(h8,13);
+	h7 += h9;    h10^= h7;    h9 = Rot64(h9,38);
+	h8 += h10;   h11^= h8;    h10= Rot64(h10,53);
+	h9 += h11;   h0 ^= h9;    h11= Rot64(h11,42);
+	h10+= h0;    h1 ^= h10;   h0 = Rot64(h0,54);
+    }
+
+    static INLINE void End(
+        uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
+        uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
+        uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
+    {
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    }
+
+    //
+    // The goal is for each bit of the input to expand into 128 bits of 
+    //   apparent entropy before it is fully overwritten.
+    // n trials both set and cleared at least m bits of h0 h1 h2 h3
+    //   n: 2   m: 29
+    //   n: 3   m: 46
+    //   n: 4   m: 57
+    //   n: 5   m: 107
+    //   n: 6   m: 146
+    //   n: 7   m: 152
+    // when run forwards or backwards
+    // for all 1-bit and 2-bit diffs
+    // with diffs defined by either xor or subtraction
+    // with a base of all zeros plus a counter, or plus another bit, or random
+    //
+    static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
+    {
+        h2 = Rot64(h2,50);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,52);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,30);  h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,41);  h1 += h2;  h3 ^= h1;
+        h2 = Rot64(h2,54);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,48);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,38);  h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,37);  h1 += h2;  h3 ^= h1;
+        h2 = Rot64(h2,62);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,34);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,5);   h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,36);  h1 += h2;  h3 ^= h1;
+    }
+
+    //
+    // Mix all 4 inputs together so that h0, h1 are a hash of them all.
+    //
+    // For two inputs differing in just the input bits
+    // Where "differ" means xor or subtraction
+    // And the base value is random, or a counting value starting at that bit
+    // The final result will have each bit of h0, h1 flip
+    // For every input bit,
+    // with probability 50 +- .3% (it is probably better than that)
+    // For every pair of input bits,
+    // with probability 50 +- .75% (the worst case is approximately that)
+    //
+    static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
+    {
+        h3 ^= h2;  h2 = Rot64(h2,15);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,52);  h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,26);  h1 += h0;
+        h2 ^= h1;  h1 = Rot64(h1,51);  h2 += h1;
+        h3 ^= h2;  h2 = Rot64(h2,28);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,9);   h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,47);  h1 += h0;
+        h2 ^= h1;  h1 = Rot64(h1,54);  h2 += h1;
+        h3 ^= h2;  h2 = Rot64(h2,32);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,25);  h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,63);  h1 += h0;
+    }
+    
+private:
+
+    //
+    // Short is used for messages under 192 bytes in length
+    // Short has a low startup cost, the normal mode is good for long
+    // keys, the cost crossover is at about 192 bytes.  The two modes were
+    // held to the same quality bar.
+    // 
+    static void Short(
+        const void *message,
+        size_t length,
+        uint64 *hash1,
+        uint64 *hash2);
+
+    // number of uint64's in internal state
+    static const size_t sc_numVars = 12;
+
+    // size of the internal state
+    static const size_t sc_blockSize = sc_numVars*8;
+
+    // size of buffer of unhashed data, in bytes
+    static const size_t sc_bufSize = 2*sc_blockSize;
+
+    //
+    // sc_const: a constant which:
+    //  * is not zero
+    //  * is odd
+    //  * is a not-very-regular mix of 1's and 0's
+    //  * does not need any other special mathematical properties
+    //
+    static const uint64 sc_const = 0xdeadbeefdeadbeefULL;
+
+    uint64 m_data[2*sc_numVars];   // unhashed data, for partial messages
+    uint64 m_state[sc_numVars];  // internal state of the hash
+    size_t m_length;             // total length of the input so far
+    uint8  m_remainder;          // length of unhashed data stashed in m_data
+};
+
+
+
diff --git a/SpookyTest.cpp b/SpookyTest.cpp
new file mode 100644
index 0000000..df9021e
--- /dev/null
+++ b/SpookyTest.cpp
@@ -0,0 +1,16 @@
+#include "Spooky.h"
+
+void SpookyHash32_test(const void *key, int len, uint32_t seed, void *out) {
+  *(uint32_t*)out = SpookyHash::Hash32(key, len, seed);
+}
+
+void SpookyHash64_test(const void *key, int len, uint32_t seed, void *out) {
+  *(uint64_t*)out = SpookyHash::Hash64(key, len, seed);
+}
+
+void SpookyHash128_test(const void *key, int len, uint32_t seed, void *out) {
+  uint64_t h1 = seed, h2 = seed;
+  SpookyHash::Hash128(key, len, &h1, &h2);
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
diff --git a/Stats.cpp b/Stats.cpp
new file mode 100644
index 0000000..4452290
--- /dev/null
+++ b/Stats.cpp
@@ -0,0 +1,99 @@
+#include "Stats.h"
+
+//-----------------------------------------------------------------------------
+
+double chooseK ( int n, int k )
+{
+  if(k > (n - k)) k = n - k;
+
+  double c = 1;
+
+  for(int i = 0; i < k; i++)
+  {
+    double t = double(n-i) / double(i+1);
+
+    c *= t;
+  }
+
+    return c;
+}
+
+double chooseUpToK ( int n, int k )
+{
+  double c = 0;
+
+  for(int i = 1; i <= k; i++)
+  {
+    c += chooseK(n,i);
+  }
+
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+// Distribution "score"
+// TODO - big writeup of what this score means
+
+// Basically, we're computing a constant that says "The test distribution is as
+// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
+// the bins. This makes for a nice uniform way to rate a distribution that isn't
+// dependent on the number of bins or the number of keys
+
+// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
+// as distribution weaknesses)
+
+double calcScore ( const int * bins, const int bincount, const int keycount )
+{
+  double n = bincount;
+  double k = keycount;
+
+  // compute rms value
+
+  double r = 0;
+
+  for(int i = 0; i < bincount; i++)
+  {
+    double b = bins[i];
+
+    r += b*b;
+  }
+
+  r = sqrt(r / n);
+
+  // compute fill factor
+
+  double f = (k*k - 1) / (n*r*r - k);
+
+  // rescale to (0,1) with 0 = good, 1 = bad
+
+  return 1 - (f / n);
+}
+
+
+//----------------------------------------------------------------------------
+
+void plot ( double n )
+{
+  double n2 = n * 1;
+
+  if(n2 < 0) n2 = 0;
+
+  n2 *= 100;
+
+  if(n2 > 64) n2 = 64;
+
+  int n3 = (int)n2;
+
+  if(n3 == 0)
+    printf(".");
+  else
+  {
+    char x = '0' + char(n3);
+
+    if(x > '9') x = 'X';
+
+    printf("%c",x);
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Stats.h b/Stats.h
new file mode 100644
index 0000000..3565e80
--- /dev/null
+++ b/Stats.h
@@ -0,0 +1,388 @@
+#pragma once
+
+#include "Types.h"
+
+#include <math.h>
+#include <vector>
+#include <map>
+#include <algorithm>   // for std::sort
+#include <string.h>    // for memset
+#include <stdio.h>     // for printf
+
+double calcScore ( const int * bins, const int bincount, const int ballcount );
+
+void plot ( double n );
+
+inline double ExpectedCollisions ( double balls, double bins )
+{
+  return balls - bins + bins * pow(1 - 1/bins,balls);
+}
+
+double chooseK ( int b, int k );
+double chooseUpToK ( int n, int k );
+
+//-----------------------------------------------------------------------------
+
+inline uint32_t f3mix ( uint32_t k )
+{
+  k ^= k >> 16;
+  k *= 0x85ebca6b;
+  k ^= k >> 13;
+  k *= 0xc2b2ae35;
+  k ^= k >> 16;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+// Sort the hash list, count the total number of collisions and return
+// the first N collisions for further processing
+
+template< typename hashtype >
+int FindCollisions ( std::vector<hashtype> & hashes, 
+                     HashSet<hashtype> & collisions,
+                     int maxCollisions )
+{
+  int collcount = 0;
+
+  std::sort(hashes.begin(),hashes.end());
+
+  for(size_t i = 1; i < hashes.size(); i++)
+  {
+    if(hashes[i] == hashes[i-1])
+    {
+      collcount++;
+
+      if((int)collisions.size() < maxCollisions)
+      {
+        collisions.insert(hashes[i]);
+      }
+    }
+  }
+
+  return collcount;
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
+{
+  int collcount = 0;
+
+  typedef std::map<hashtype,keytype> htab;
+  htab tab;
+
+  for(size_t i = 1; i < keys.size(); i++)
+  {
+    keytype & k1 = keys[i];
+
+    hashtype h = hash(&k1,sizeof(keytype),0);
+
+    typename htab::iterator it = tab.find(h);
+
+    if(it != tab.end())
+    {
+      keytype & k2 = (*it).second;
+
+      printf("A: ");
+      printbits(&k1,sizeof(keytype));
+      printf("B: ");
+      printbits(&k2,sizeof(keytype));
+    }
+    else
+    {
+      tab.insert( std::make_pair(h,k1) );
+    }
+  }
+
+  return collcount;
+}
+
+//----------------------------------------------------------------------------
+// Measure the distribution "score" for each possible N-bit span up to 20 bits
+
+template< typename hashtype >
+double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  printf("Testing distribution - ");
+
+  if(drawDiagram) printf("\n");
+
+  const int hashbits = sizeof(hashtype) * 8;
+
+  int maxwidth = 20;
+
+  // We need at least 5 keys per bin to reliably test distribution biases
+  // down to 1%, so don't bother to test sparser distributions than that
+
+  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
+  {
+    maxwidth--;
+  }
+
+  std::vector<int> bins;
+  bins.resize(1 << maxwidth);
+
+  double worst = 0;
+  int worstStart = -1;
+  int worstWidth = -1;
+
+  for(int start = 0; start < hashbits; start++)
+  {
+    int width = maxwidth;
+    int bincount = (1 << width);
+
+    memset(&bins[0],0,sizeof(int)*bincount);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,width);
+
+      bins[index]++;
+    }
+
+    // Test the distribution, then fold the bins in half,
+    // repeat until we're down to 256 bins
+
+    if(drawDiagram) printf("[");
+
+    while(bincount >= 256)
+    {
+      double n = calcScore(&bins[0],bincount,(int)hashes.size());
+
+      if(drawDiagram) plot(n);
+
+      if(n > worst)
+      {
+        worst = n;
+        worstStart = start;
+        worstWidth = width;
+      }
+
+      width--;
+      bincount /= 2;
+
+      if(width < 8) break;
+
+      for(int i = 0; i < bincount; i++)
+      {
+        bins[i] += bins[i+bincount];
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  double pct = worst * 100.0;
+
+  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
+  if(pct >= 1.0) printf(" !!!!! ");
+  printf("\n");
+
+  return worst;
+}
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
+{
+  bool result = true;
+
+  {
+    size_t count = hashes.size();
+
+    double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
+
+    printf("Testing collisions   - Expected %8.2f, ",expected);
+
+    double collcount = 0;
+
+    HashSet<hashtype> collisions;
+
+    collcount = FindCollisions(hashes,collisions,1000);
+
+    printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
+
+    if(sizeof(hashtype) == sizeof(uint32_t))
+    {
+    // 2x expected collisions = fail
+
+    // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
+    // of a scale factor, otherwise we fail erroneously if there are a small expected number
+    // of collisions
+
+    if(double(collcount) / double(expected) > 2.0)
+    {
+      printf(" !!!!! ");
+      result = false;
+    }
+    }
+    else
+    {
+      // For all hashes larger than 32 bits, _any_ collisions are a failure.
+      
+      if(collcount > 0)
+      {
+        printf(" !!!!! ");
+        result = false;
+      }
+    }
+
+    printf("\n");
+  }
+
+  //----------
+
+  if(testDist)
+  {
+    TestDistribution(hashes,drawDiagram);
+  }
+
+  return result;
+}
+
+//----------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
+{
+  std::vector<hashtype> collisions;
+
+  return TestHashList(hashes,collisions,testDist,drawDiagram);
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
+{
+  int keycount = (int)keys.size();
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  printf("Hashing");
+
+  for(int i = 0; i < keycount; i++)
+  {
+    if(i % (keycount / 10) == 0) printf(".");
+
+    keytype & k = keys[i];
+
+    hash(&k,sizeof(k),0,&hashes[i]);
+  }
+
+  printf("\n");
+
+  bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Bytepair test - generate 16-bit indices from all possible non-overlapping
+// 8-bit sections of the hash value, check distribution on all of them.
+
+// This is a very good test for catching weak intercorrelations between bits - 
+// much harder to pass than the normal distribution test. However, it doesn't
+// really model the normal usage of hash functions in hash table lookup, so
+// I'm not sure it's that useful (and hash functions that fail this test but
+// pass the normal distribution test still work well in practice)
+
+template < typename hashtype >
+double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  const int nbytes = sizeof(hashtype);
+  const int hashbits = nbytes * 8;
+  
+  const int nbins = 65536;
+
+  std::vector<int> bins(nbins,0);
+
+  double worst = 0;
+
+  for(int a = 0; a < hashbits; a++)
+  {
+    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
+
+    if(drawDiagram) printf("[");
+
+    for(int b = 0; b < hashbits; b++)
+    {
+      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
+
+      bins.clear();
+      bins.resize(nbins,0);
+
+      for(size_t i = 0; i < hashes.size(); i++)
+      {
+        hashtype & hash = hashes[i];
+
+        uint32_t pa = window(&hash,sizeof(hash),a,8);
+        uint32_t pb = window(&hash,sizeof(hash),b,8);
+
+        bins[pa | (pb << 8)]++;
+      }
+
+      double s = calcScore(bins,bins.size(),hashes.size());
+
+      if(drawDiagram) plot(s);
+
+      if(s > worst)
+      {
+        worst = s;
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  return worst;
+}
+
+//-----------------------------------------------------------------------------
+// Simplified test - only check 64k distributions, and only on byte boundaries
+
+template < typename hashtype >
+void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
+{
+  const int hashbits = sizeof(hashtype) * 8;
+  const int nbins = 65536;
+  
+  std::vector<int> bins(nbins,0);
+
+  dworst = -1.0e90;
+  davg = 0;
+
+  for(int start = 0; start < hashbits; start += 8)
+  {
+    bins.clear();
+    bins.resize(nbins,0);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,16);
+
+      bins[index]++;
+    }
+
+    double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
+    
+    davg += n;
+
+    if(n > dworst) dworst = n;
+  }
+
+  davg /= double(hashbits/8);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
new file mode 100644
index 0000000..1f6d39a
--- /dev/null
+++ b/SuperFastHash.cpp
@@ -0,0 +1,76 @@
+#include "Platform.h"
+#include <stdio.h> // for NULL
+
+/* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
+   license. See: 
+   http://www.azillionmonkeys.com/qed/weblicense.html for license details.
+
+   http://www.azillionmonkeys.com/qed/hash.html */
+
+/*
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+*/
+
+FORCE_INLINE uint16_t get16bits ( const void * p )
+{
+  return *(const uint16_t*)p;
+}
+
+uint32_t SuperFastHash (const signed char * data, int len) {
+uint32_t hash = 0, tmp;
+int rem;
+
+  if (len <= 0 || data == NULL) return 0;
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (;len > 0; len--) {
+    hash  += get16bits (data);
+    tmp    = (get16bits (data+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    data  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+    case 3:	hash += get16bits (data);
+        hash ^= hash << 16;
+        hash ^= data[sizeof (uint16_t)] << 18;
+        hash += hash >> 11;
+        break;
+    case 2:	hash += get16bits (data);
+        hash ^= hash << 11;
+        hash += hash >> 17;
+        break;
+    case 1: hash += *data;
+        hash ^= hash << 10;
+        hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+}
+
+void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+  *(uint32_t*)out = SuperFastHash((const signed char*)key,len);
+}
diff --git a/Types.cpp b/Types.cpp
new file mode 100644
index 0000000..6ad5312
--- /dev/null
+++ b/Types.cpp
@@ -0,0 +1,148 @@
+#include "Types.h"
+
+#include "Random.h"
+
+#include <stdio.h>
+
+uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#if defined(_MSC_VER)
+#pragma optimize( "", off )
+#endif
+
+void blackhole ( uint32_t )
+{
+}
+
+uint32_t whitehole ( void )
+{
+  return 0;
+}
+
+#if defined(_MSC_VER)
+#pragma optimize( "", on ) 
+#endif
+
+uint32_t g_verify = 1;
+
+void MixVCode ( const void * blob, int len )
+{
+	g_verify = MurmurOAAT(blob,len,g_verify);
+}
+
+//-----------------------------------------------------------------------------
+
+bool isprime ( uint32_t x )
+{
+  uint32_t p[] = 
+  {
+    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
+    103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
+    199,211,223,227,229,233,239,241,251
+  };
+
+  for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
+  { 
+    if((x % p[i]) == 0)
+    {
+      return false;
+    }
+  } 
+
+  for(int i = 257; i < 65536; i += 2) 
+  { 
+    if((x % i) == 0)
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void GenerateMixingConstants ( void )
+{
+  Rand r(8350147);
+
+  int count = 0;
+
+  int trials = 0;
+  int bitfail = 0;
+  int popfail = 0;
+  int matchfail = 0;
+  int primefail = 0;
+
+  //for(uint32_t x = 1; x; x++)
+  while(count < 100)
+  {
+    //if(x % 100000000 == 0) printf(".");
+
+    trials++;
+    uint32_t b = r.rand_u32();
+    //uint32_t b = x;
+
+    //----------
+    // must have between 14 and 18 set bits
+
+    if(popcount(b) < 16) { b = 0; popfail++; }
+    if(popcount(b) > 16) { b = 0; popfail++; }
+
+    if(b == 0) continue;
+
+    //----------
+    // must have 3-5 bits set per 8-bit window
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+
+      if(popcount(c) < 3) { b = 0; bitfail++; break; }
+      if(popcount(c) > 5) { b = 0; bitfail++; break; }
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // all 8-bit windows must be different
+
+    uint8_t match[256];
+
+    memset(match,0,256);
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+      
+      if(match[c]) { b = 0; matchfail++; break; }
+
+      match[c] = 1;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // must be prime
+
+    if(!isprime(b))
+    {
+      b = 0;
+      primefail++;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+
+    if(b)
+    {
+      printf("0x%08x : 0x%08x\n",b,~b);
+      count++;
+    }
+  }
+
+  printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
new file mode 100644
index 0000000..91e7206
--- /dev/null
+++ b/Types.h
@@ -0,0 +1,374 @@
+#pragma once
+
+#include "Platform.h"
+#include "Bitvec.h"
+
+#include <memory.h>
+#include <vector>
+#include <map>
+#include <set>
+
+//-----------------------------------------------------------------------------
+// If the optimizer detects that a value in a speed test is constant or unused,
+// the optimizer may remove references to it or otherwise create code that
+// would not occur in a real-world application. To prevent the optimizer from
+// doing this we declare two trivial functions that either sink or source data,
+// and bar the compiler from optimizing them.
+
+void     blackhole ( uint32_t x );
+uint32_t whitehole ( void );
+
+//-----------------------------------------------------------------------------
+// We want to verify that every test produces the same result on every platform
+// To do this, we hash the results of every test to produce an overall
+// verification value for the whole test suite. If two runs produce the same
+// verification value, then every test in both run produced the same results
+
+extern uint32_t g_verify;
+
+// Mix the given blob of data into the verification code
+
+void MixVCode ( const void * blob, int len );
+
+
+//-----------------------------------------------------------------------------
+
+typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
+
+struct ByteVec : public std::vector<uint8_t>
+{
+  ByteVec ( const void * key, int len )
+  {
+    resize(len);
+    memcpy(&front(),key,len);
+  }
+};
+
+template< typename hashtype, typename keytype >
+struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
+{
+};
+
+template< typename hashtype >
+struct HashSet : public std::set<hashtype>
+{
+};
+
+//-----------------------------------------------------------------------------
+
+template < class T >
+class hashfunc
+{
+public:
+
+  hashfunc ( pfHash h ) : m_hash(h)
+  {
+  }
+
+  inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
+  {
+    m_hash(key,len,seed,out);
+  }
+
+  inline operator pfHash ( void ) const
+  {
+    return m_hash;
+  }
+
+  inline T operator () ( const void * key, const int len, const uint32_t seed ) 
+  {
+    T result;
+
+    m_hash(key,len,seed,(uint32_t*)&result);
+
+    return result;
+  }
+
+  pfHash m_hash;
+};
+
+//-----------------------------------------------------------------------------
+// Key-processing callback objects. Simplifies keyset testing a bit.
+
+struct KeyCallback
+{
+  KeyCallback() : m_count(0)
+  {
+  }
+
+  virtual ~KeyCallback()
+  {
+  }
+
+  virtual void operator() ( const void * key, int len )
+  {
+    m_count++;
+  }
+
+  virtual void reserve ( int keycount )
+  {
+  };
+
+  int m_count;
+};
+
+//----------
+
+template<typename hashtype>
+struct HashCallback : public KeyCallback
+{
+  typedef std::vector<hashtype> hashvec;
+
+  HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
+  {
+    m_hashes.clear();
+  }
+
+  virtual void operator () ( const void * key, int len )
+  {
+    size_t newsize = m_hashes.size() + 1;
+    
+    m_hashes.resize(newsize);
+
+    m_pfHash(key,len,0,&m_hashes.back());
+  }
+
+  virtual void reserve ( int keycount )
+  {
+    m_hashes.reserve(keycount);
+  }
+
+  hashvec & m_hashes;
+  pfHash m_pfHash;
+
+  //----------
+
+private:
+
+  HashCallback & operator = ( const HashCallback & );
+};
+
+//----------
+
+template<typename hashtype>
+struct CollisionCallback : public KeyCallback
+{
+  typedef HashSet<hashtype> hashset;
+  typedef CollisionMap<hashtype,ByteVec> collmap;
+
+  CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 
+  : m_pfHash(hash), 
+    m_collisions(collisions),
+    m_collmap(cmap)
+  {
+  }
+
+  virtual void operator () ( const void * key, int len )
+  {
+    hashtype h;
+
+    m_pfHash(key,len,0,&h);
+    
+    if(m_collisions.count(h))
+    {
+      m_collmap[h].push_back( ByteVec(key,len) );
+    }
+  }
+
+  //----------
+
+  pfHash m_pfHash;
+  hashset & m_collisions;
+  collmap & m_collmap;
+
+private:
+
+  CollisionCallback & operator = ( const CollisionCallback & c );
+};
+
+//-----------------------------------------------------------------------------
+
+template < int _bits >
+class Blob
+{
+public:
+
+  Blob()
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  Blob ( int x )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+
+    *(int*)bytes = x;
+  }
+
+  Blob ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+  }
+
+  Blob & operator = ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  Blob ( uint64_t a, uint64_t b )
+  {
+    uint64_t t[2] = {a,b};
+    set(&t,16);
+  }
+
+  void set ( const void * blob, size_t len )
+  {
+    const uint8_t * k = (const uint8_t*)blob;
+
+    len = len > sizeof(bytes) ? sizeof(bytes) : len;
+
+    for(size_t i = 0; i < len; i++)
+    {
+      bytes[i] = k[i];
+    }
+
+    for(size_t i = len; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  uint8_t & operator [] ( int i )
+  {
+    return bytes[i];
+  }
+
+  const uint8_t & operator [] ( int i ) const
+  {
+    return bytes[i];
+  }
+
+  //----------
+  // boolean operations
+  
+  bool operator < ( const Blob & k ) const
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      if(bytes[i] < k.bytes[i]) return true;
+      if(bytes[i] > k.bytes[i]) return false;
+    }
+
+    return false;
+  }
+
+  bool operator == ( const Blob & k ) const
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      if(bytes[i] != k.bytes[i]) return false;
+    }
+
+    return true;
+  }
+
+  bool operator != ( const Blob & k ) const
+  {
+    return !(*this == k);
+  }
+
+  //----------
+  // bitwise operations
+
+  Blob operator ^ ( const Blob & k ) const 
+  {
+    Blob t;
+
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      t.bytes[i] = bytes[i] ^ k.bytes[i];
+    }
+
+    return t;
+  }
+
+  Blob & operator ^= ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] ^= k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  int operator & ( int x )
+  {
+    return (*(int*)bytes) & x;
+  }
+
+  Blob & operator &= ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] &= k.bytes[i];
+    }
+  }
+
+  Blob operator << ( int c )
+  {
+    Blob t = *this;
+
+    lshift(&t.bytes[0],sizeof(bytes),c);
+
+    return t;
+  }
+
+  Blob operator >> ( int c )
+  {
+    Blob t = *this;
+
+    rshift(&t.bytes[0],sizeof(bytes),c);
+
+    return t;
+  }
+
+  Blob & operator <<= ( int c )
+  {
+    lshift(&bytes[0],sizeof(bytes),c);
+
+    return *this;
+  }
+
+  Blob & operator >>= ( int c )
+  {
+    rshift(&bytes[0],sizeof(bytes),c);
+
+    return *this;
+  }
+
+  //----------
+  
+private:
+
+  uint8_t bytes[(_bits+7)/8];
+};
+
+typedef Blob<128> uint128_t;
+typedef Blob<256> uint256_t;
+
+//-----------------------------------------------------------------------------
diff --git a/crc.cpp b/crc.cpp
new file mode 100644
index 0000000..d4d6b84
--- /dev/null
+++ b/crc.cpp
@@ -0,0 +1,100 @@
+#include "Platform.h"
+
+/*
+ * This file is derived from crc32.c from the zlib-1.1.3 distribution
+ * by Jean-loup Gailly and Mark Adler.
+ */
+
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-1998 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+
+/* ========================================================================
+ * Table of CRC-32's of all single-byte values (made by make_crc_table)
+ */
+static const uint32_t crc_table[256] = {
+  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+  0x2d02ef8dL
+};
+
+/* ========================================================================= */
+
+#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
+#define DO2(buf)  DO1(buf); DO1(buf);
+#define DO4(buf)  DO2(buf); DO2(buf);
+#define DO8(buf)  DO4(buf); DO4(buf);
+
+/* ========================================================================= */
+
+void crc32 ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint8_t * buf = (uint8_t*)key;
+  uint32_t crc = seed ^ 0xffffffffL;
+
+  while (len >= 8)
+  {
+    DO8(buf);
+    len -= 8;
+  }
+
+  while(len--)
+  {
+    DO1(buf);
+  } 
+
+  crc ^= 0xffffffffL;
+
+  *(uint32_t*)out = crc;
+}
diff --git a/lookup3.cpp b/lookup3.cpp
new file mode 100644
index 0000000..63f00f8
--- /dev/null
+++ b/lookup3.cpp
@@ -0,0 +1,72 @@
+// lookup3 by Bob Jekins, code is public domain.
+
+#include "Platform.h"
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+#define mix(a,b,c) \
+{ \
+  a -= c;  a ^= rot(c, 4);  c += b; \
+  b -= a;  b ^= rot(a, 6);  a += c; \
+  c -= b;  c ^= rot(b, 8);  b += a; \
+  a -= c;  a ^= rot(c,16);  c += b; \
+  b -= a;  b ^= rot(a,19);  a += c; \
+  c -= b;  c ^= rot(b, 4);  b += a; \
+}
+
+#define final(a,b,c) \
+{ \
+  c ^= b; c -= rot(b,14); \
+  a ^= c; a -= rot(c,11); \
+  b ^= a; b -= rot(a,25); \
+  c ^= b; c -= rot(b,16); \
+  a ^= c; a -= rot(c,4);  \
+  b ^= a; b -= rot(a,14); \
+  c ^= b; c -= rot(b,24); \
+}
+
+uint32_t lookup3 ( const void * key, int length, uint32_t initval )
+{
+  uint32_t a,b,c;                                          /* internal state */
+
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+  const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+
+  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+  while (length > 12)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 12;
+    k += 3;
+  }
+
+  switch(length)
+  {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff; break;
+    case 2 : a+=k[0]&0xffff; break;
+    case 1 : a+=k[0]&0xff; break;
+    case 0 : { return c; }              /* zero length strings require no mixing */
+  }
+
+  final(a,b,c);
+
+  return c;
+}
+
+void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = lookup3(key,len,seed);
+}
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..678ddb2
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,597 @@
+#include "Platform.h"
+#include "Hashes.h"
+#include "KeysetTest.h"
+#include "SpeedTest.h"
+#include "AvalancheTest.h"
+#include "DifferentialTest.h"
+#include "PMurHash.h"
+
+#include <stdio.h>
+#include <time.h>
+
+//-----------------------------------------------------------------------------
+// Configuration. TODO - move these to command-line flags
+
+bool g_testAll = false;
+
+bool g_testSanity      = false;
+bool g_testSpeed       = false;
+bool g_testDiff        = false;
+bool g_testDiffDist    = false;
+bool g_testAvalanche   = false;
+bool g_testBIC         = false;
+bool g_testCyclic      = false;
+bool g_testTwoBytes    = false;
+bool g_testSparse      = false;
+bool g_testPermutation = false;
+bool g_testWindow      = false;
+bool g_testText        = false;
+bool g_testZeroes      = false;
+bool g_testSeed        = false;
+
+//-----------------------------------------------------------------------------
+// This is the list of all hashes that SMHasher can test.
+
+struct HashInfo
+{
+  pfHash hash;
+  int hashbits;
+  uint32_t verification;
+  const char * name;
+  const char * desc;
+};
+
+HashInfo g_hashes[] =
+{
+  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
+
+  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },
+
+  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
+  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
+
+  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
+  { Bernstein,            32, 0xBDB4B640, "bernstein",   "Bernstein, 32-bit" },
+  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },
+  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
+  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
+  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
+
+  { CityHash64_test,      64, 0x25A20825, "City64",      "Google CityHash64WithSeed" },
+  { CityHash128_test,    128, 0x6531F54E, "City128",     "Google CityHash128WithSeed" },
+
+  { SpookyHash32_test,    32, 0x3F798BBB, "Spooky32",    "Bob Jenkins' SpookyHash, 32-bit result" },
+  { SpookyHash64_test,    64, 0xA7F955F1, "Spooky64",    "Bob Jenkins' SpookyHash, 64-bit result" },
+  { SpookyHash128_test,  128, 0x8D263080, "Spooky128",   "Bob Jenkins' SpookyHash, 128-bit result" },
+
+  // MurmurHash2
+
+  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
+  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
+  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
+  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
+
+  // MurmurHash3
+
+  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+
+  { PMurHash32_test,      32, 0xB0F57EE3, "PMurHash32",  "Shane Day's portable-ized MurmurHash3 for x86, 32-bit." },
+};
+
+HashInfo * findHash ( const char * name )
+{
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
+  }
+
+  return NULL;
+}
+
+//-----------------------------------------------------------------------------
+// Self-test on startup - verify that all installed hashes work correctly.
+
+void SelfTest ( void )
+{
+  bool pass = true;
+
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    HashInfo * info = & g_hashes[i];
+
+    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
+  }
+
+  if(!pass)
+  {
+    printf("Self-test FAILED!\n");
+
+    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+    {
+      HashInfo * info = & g_hashes[i];
+
+      printf("%16s - ",info->name);
+      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
+    }
+
+    exit(1);
+  }
+}
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+void test ( hashfunc<hashtype> hash, HashInfo * info )
+{
+  const int hashbits = sizeof(hashtype) * 8;
+
+  printf("-------------------------------------------------------------------------------\n");
+  printf("--- Testing %s (%s)\n\n",info->name,info->desc);
+
+  //-----------------------------------------------------------------------------
+  // Sanity tests
+
+  if(g_testSanity || g_testAll)
+  {
+    printf("[[[ Sanity Tests ]]]\n\n");
+
+    VerificationTest(hash,hashbits,info->verification,true);
+    SanityTest(hash,hashbits);
+    AppendedZeroesTest(hash,hashbits);
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Speed tests
+
+  if(g_testSpeed || g_testAll)
+  {
+    printf("[[[ Speed Tests ]]]\n\n");
+
+    BulkSpeedTest(info->hash,info->verification);
+    printf("\n");
+
+    for(int i = 1; i < 32; i++)
+    {
+      double cycles;
+
+      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
+    }
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential tests
+
+  if(g_testDiff || g_testAll)
+  {
+    printf("[[[ Differential Tests ]]]\n\n");
+
+    bool result = true;
+    bool dumpCollisions = false;
+
+    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
+    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
+    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential-distribution tests
+
+  if(g_testDiffDist /*|| g_testAll*/)
+  {
+    printf("[[[ Differential Distribution Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= DiffDistTest2<uint64_t,hashtype>(hash);
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Avalanche tests
+
+  if(g_testAvalanche || g_testAll)
+  {
+    printf("[[[ Avalanche Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Bit Independence Criteria. Interesting, but doesn't tell us much about
+  // collision or distribution.
+
+  if(g_testBIC)
+  {
+    printf("[[[ Bit Independence Criteria ]]]\n\n");
+
+    bool result = true;
+
+    //result &= BicTest<uint64_t,hashtype>(hash,2000000);
+    BicTest3<Blob<88>,hashtype>(hash,2000000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
+
+  if(g_testCyclic || g_testAll)
+  {
+    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
+
+  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
+
+  if(g_testTwoBytes || g_testAll)
+  {
+    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    for(int i = 4; i <= 20; i += 4)
+    {
+      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
+    }
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Sparse' - keys with all bits 0 except a few
+
+  if(g_testSparse || g_testAll)
+  {
+    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);
+    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
+    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Permutation' - all possible combinations of a set of blocks
+
+  if(g_testPermutation || g_testAll)
+  {
+    {
+      // This one breaks lookup3, surprisingly
+
+      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x80000000,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+
+        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Window'
+
+  // Skip distribution test for these - they're too easy to distribute well,
+  // and it generates a _lot_ of testing
+
+  if(g_testWindow || g_testAll)
+  {
+    printf("[[[ Keyset 'Window' Tests ]]]\n\n");
+
+    bool result = true;
+    bool testCollision = true;
+    bool testDistribution = false;
+    bool drawDiagram = false;
+
+    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Text'
+
+  if(g_testText || g_testAll)
+  {
+    printf("[[[ Keyset 'Text' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
+    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
+    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Zeroes'
+
+  if(g_testZeroes || g_testAll)
+  {
+    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Seed'
+
+  if(g_testSeed || g_testAll)
+  {
+    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t g_inputVCode = 1;
+uint32_t g_outputVCode = 1;
+uint32_t g_resultVCode = 1;
+
+HashInfo * g_hashUnderTest = NULL;
+
+void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
+  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
+
+  g_hashUnderTest->hash(key,len,seed,out);
+
+  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
+}
+
+//-----------------------------------------------------------------------------
+
+void testHash ( const char * name )
+{
+  HashInfo * pInfo = findHash(name);
+
+  if(pInfo == NULL)
+  {
+    printf("Invalid hash '%s' specified\n",name);
+    return;
+  }
+  else
+  {
+    g_hashUnderTest = pInfo;
+
+    if(pInfo->hashbits == 32)
+    {
+      test<uint32_t>( VerifyHash, pInfo );
+    }
+    else if(pInfo->hashbits == 64)
+    {
+      test<uint64_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 128)
+    {
+      test<uint128_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 256)
+    {
+      test<uint256_t>( pInfo->hash, pInfo );
+    }
+    else
+    {
+      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
+    }
+  }
+}
+//-----------------------------------------------------------------------------
+
+int main ( int argc, char ** argv )
+{
+  const char * hashToTest = "murmur3a";
+
+  if(argc < 2)
+  {
+    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
+  }
+  else
+  {
+    hashToTest = argv[1];
+  }
+
+  // Code runs on the 3rd CPU by default
+
+  SetAffinity((1 << 2));
+
+  SelfTest();
+
+  int timeBegin = clock();
+
+  g_testAll = true;
+
+  //g_testSanity = true;
+  //g_testSpeed = true;
+  //g_testAvalanche = true;
+  //g_testBIC = true;
+  //g_testCyclic = true;
+  //g_testTwoBytes = true;
+  //g_testDiff = true;
+  //g_testDiffDist = true;
+  //g_testSparse = true;
+  //g_testPermutation = true;
+  //g_testWindow = true;
+  //g_testZeroes = true;
+
+  testHash(hashToTest);
+
+  //----------
+
+  int timeEnd = clock();
+
+  printf("\n");
+  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
+  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
+  printf("-------------------------------------------------------------------------------\n");
+  return 0;
+}
diff --git a/md5.cpp b/md5.cpp
new file mode 100644
index 0000000..8e50c79
--- /dev/null
+++ b/md5.cpp
@@ -0,0 +1,382 @@
+#include <memory.h>
+#include "Types.h"
+
+// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"
+
+/**
+ * \brief          MD5 context structure
+ */
+typedef struct
+{
+    unsigned long total[2];     /*!< number of bytes processed  */
+    unsigned long state[4];     /*!< intermediate digest state  */
+    unsigned char buffer[64];   /*!< data block being processed */
+
+    unsigned char ipad[64];     /*!< HMAC: inner padding        */
+    unsigned char opad[64];     /*!< HMAC: outer padding        */
+}
+md5_context;
+
+/**
+ * \brief          MD5 context setup
+ *
+ * \param ctx      context to be initialized
+ */
+void md5_starts( md5_context *ctx );
+
+/**
+ * \brief          MD5 process buffer
+ *
+ * \param ctx      MD5 context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 final digest
+ *
+ * \param ctx      MD5 context
+ * \param output   MD5 checksum result
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( input buffer )
+ *
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   MD5 checksum result
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( file contents )
+ *
+ * \param path     input file name
+ * \param output   MD5 checksum result
+ *
+ * \return         0 if successful, 1 if fopen failed,
+ *                 or 2 if fread failed
+ */
+int md5_file( char *path, unsigned char output[16] );
+
+/**
+ * \brief          MD5 HMAC context setup
+ *
+ * \param ctx      HMAC context to be initialized
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ */
+void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
+
+/**
+ * \brief          MD5 HMAC process buffer
+ *
+ * \param ctx      HMAC context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 HMAC final digest
+ *
+ * \param ctx      HMAC context
+ * \param output   MD5 HMAC checksum result
+ */
+void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = HMAC-MD5( hmac key, input buffer )
+ *
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   HMAC-MD5 result
+ */
+void md5_hmac( unsigned char *key, int keylen,
+               unsigned char *input, int ilen,
+               unsigned char output[16] );
+
+/**
+ * \brief          Checkup routine
+ *
+ * \return         0 if successful, or 1 if the test failed
+ */
+int md5_self_test( int verbose );
+
+/*
+ * 32-bit integer manipulation macros (little endian)
+ */
+#ifndef GET_ULONG_LE
+#define GET_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (n) = ( (unsigned long) (b)[(i)    ]       )        \
+        | ( (unsigned long) (b)[(i) + 1] <<  8 )        \
+        | ( (unsigned long) (b)[(i) + 2] << 16 )        \
+        | ( (unsigned long) (b)[(i) + 3] << 24 );       \
+}
+#endif
+
+#ifndef PUT_ULONG_LE
+#define PUT_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n)       );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
+}
+#endif
+
+/*
+ * MD5 context setup
+ */
+void md5_starts( md5_context *ctx )
+{
+    ctx->total[0] = 0;
+    ctx->total[1] = 0;
+
+    ctx->state[0] = 0x67452301;
+    ctx->state[1] = 0xEFCDAB89;
+    ctx->state[2] = 0x98BADCFE;
+    ctx->state[3] = 0x10325476;
+}
+
+static void md5_process( md5_context *ctx, unsigned char data[64] )
+{
+    unsigned long X[16], A, B, C, D;
+
+    GET_ULONG_LE( X[ 0], data,  0 );
+    GET_ULONG_LE( X[ 1], data,  4 );
+    GET_ULONG_LE( X[ 2], data,  8 );
+    GET_ULONG_LE( X[ 3], data, 12 );
+    GET_ULONG_LE( X[ 4], data, 16 );
+    GET_ULONG_LE( X[ 5], data, 20 );
+    GET_ULONG_LE( X[ 6], data, 24 );
+    GET_ULONG_LE( X[ 7], data, 28 );
+    GET_ULONG_LE( X[ 8], data, 32 );
+    GET_ULONG_LE( X[ 9], data, 36 );
+    GET_ULONG_LE( X[10], data, 40 );
+    GET_ULONG_LE( X[11], data, 44 );
+    GET_ULONG_LE( X[12], data, 48 );
+    GET_ULONG_LE( X[13], data, 52 );
+    GET_ULONG_LE( X[14], data, 56 );
+    GET_ULONG_LE( X[15], data, 60 );
+
+#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+
+#define P(a,b,c,d,k,s,t)                                \
+{                                                       \
+    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \
+}
+
+    A = ctx->state[0];
+    B = ctx->state[1];
+    C = ctx->state[2];
+    D = ctx->state[3];
+
+#define F(x,y,z) (z ^ (x & (y ^ z)))
+
+    P( A, B, C, D,  0,  7, 0xD76AA478 );
+    P( D, A, B, C,  1, 12, 0xE8C7B756 );
+    P( C, D, A, B,  2, 17, 0x242070DB );
+    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
+    P( A, B, C, D,  4,  7, 0xF57C0FAF );
+    P( D, A, B, C,  5, 12, 0x4787C62A );
+    P( C, D, A, B,  6, 17, 0xA8304613 );
+    P( B, C, D, A,  7, 22, 0xFD469501 );
+    P( A, B, C, D,  8,  7, 0x698098D8 );
+    P( D, A, B, C,  9, 12, 0x8B44F7AF );
+    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
+    P( B, C, D, A, 11, 22, 0x895CD7BE );
+    P( A, B, C, D, 12,  7, 0x6B901122 );
+    P( D, A, B, C, 13, 12, 0xFD987193 );
+    P( C, D, A, B, 14, 17, 0xA679438E );
+    P( B, C, D, A, 15, 22, 0x49B40821 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (z & (x ^ y)))
+
+    P( A, B, C, D,  1,  5, 0xF61E2562 );
+    P( D, A, B, C,  6,  9, 0xC040B340 );
+    P( C, D, A, B, 11, 14, 0x265E5A51 );
+    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
+    P( A, B, C, D,  5,  5, 0xD62F105D );
+    P( D, A, B, C, 10,  9, 0x02441453 );
+    P( C, D, A, B, 15, 14, 0xD8A1E681 );
+    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
+    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
+    P( D, A, B, C, 14,  9, 0xC33707D6 );
+    P( C, D, A, B,  3, 14, 0xF4D50D87 );
+    P( B, C, D, A,  8, 20, 0x455A14ED );
+    P( A, B, C, D, 13,  5, 0xA9E3E905 );
+    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
+    P( C, D, A, B,  7, 14, 0x676F02D9 );
+    P( B, C, D, A, 12, 20, 0x8D2A4C8A );
+
+#undef F
+    
+#define F(x,y,z) (x ^ y ^ z)
+
+    P( A, B, C, D,  5,  4, 0xFFFA3942 );
+    P( D, A, B, C,  8, 11, 0x8771F681 );
+    P( C, D, A, B, 11, 16, 0x6D9D6122 );
+    P( B, C, D, A, 14, 23, 0xFDE5380C );
+    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
+    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
+    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
+    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
+    P( A, B, C, D, 13,  4, 0x289B7EC6 );
+    P( D, A, B, C,  0, 11, 0xEAA127FA );
+    P( C, D, A, B,  3, 16, 0xD4EF3085 );
+    P( B, C, D, A,  6, 23, 0x04881D05 );
+    P( A, B, C, D,  9,  4, 0xD9D4D039 );
+    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
+    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
+    P( B, C, D, A,  2, 23, 0xC4AC5665 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (x | ~z))
+
+    P( A, B, C, D,  0,  6, 0xF4292244 );
+    P( D, A, B, C,  7, 10, 0x432AFF97 );
+    P( C, D, A, B, 14, 15, 0xAB9423A7 );
+    P( B, C, D, A,  5, 21, 0xFC93A039 );
+    P( A, B, C, D, 12,  6, 0x655B59C3 );
+    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
+    P( C, D, A, B, 10, 15, 0xFFEFF47D );
+    P( B, C, D, A,  1, 21, 0x85845DD1 );
+    P( A, B, C, D,  8,  6, 0x6FA87E4F );
+    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
+    P( C, D, A, B,  6, 15, 0xA3014314 );
+    P( B, C, D, A, 13, 21, 0x4E0811A1 );
+    P( A, B, C, D,  4,  6, 0xF7537E82 );
+    P( D, A, B, C, 11, 10, 0xBD3AF235 );
+    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
+    P( B, C, D, A,  9, 21, 0xEB86D391 );
+
+#undef F
+
+    ctx->state[0] += A;
+    ctx->state[1] += B;
+    ctx->state[2] += C;
+    ctx->state[3] += D;
+}
+
+/*
+ * MD5 process buffer
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen )
+{
+    int fill;
+    unsigned long left;
+
+    if( ilen <= 0 )
+        return;
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if( ctx->total[0] < (unsigned long) ilen )
+        ctx->total[1]++;
+
+    if( left && ilen >= fill )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, fill );
+        md5_process( ctx, ctx->buffer );
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+    }
+
+    while( ilen >= 64 )
+    {
+        md5_process( ctx, input );
+        input += 64;
+        ilen  -= 64;
+    }
+
+    if( ilen > 0 )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, ilen );
+    }
+}
+
+static const unsigned char md5_padding[64] =
+{
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * MD5 final digest
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] )
+{
+    unsigned long last, padn;
+    unsigned long high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+         | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_ULONG_LE( low,  msglen, 0 );
+    PUT_ULONG_LE( high, msglen, 4 );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+    md5_update( ctx, (unsigned char *) md5_padding, padn );
+    md5_update( ctx, msglen, 8 );
+
+    PUT_ULONG_LE( ctx->state[0], output,  0 );
+    PUT_ULONG_LE( ctx->state[1], output,  4 );
+    PUT_ULONG_LE( ctx->state[2], output,  8 );
+    PUT_ULONG_LE( ctx->state[3], output, 12 );
+}
+
+/*
+ * output = MD5( input buffer )
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] )
+{
+    md5_context ctx;
+
+    md5_starts( &ctx );
+    md5_update( &ctx, input, ilen );
+    md5_finish( &ctx, output );
+
+    memset( &ctx, 0, sizeof( md5_context ) );
+}
+
+unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
+{
+  unsigned int hash[4];
+
+  md5((unsigned char *)input,len,(unsigned char *)hash);
+
+  //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
+
+  return hash[0];
+}	
+
+void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+  unsigned int hash[4];
+
+  md5((unsigned char*)key,len,(unsigned char*)hash);
+
+  *(uint32_t*)out = hash[0];
+}
\ No newline at end of file
diff --git a/pstdint.h b/pstdint.h
new file mode 100644
index 0000000..43dce62
--- /dev/null
+++ b/pstdint.h
@@ -0,0 +1,799 @@
+/*  A portable stdint.h
+ ****************************************************************************
+ *  BSD License:
+ ****************************************************************************
+ *
+ *  Copyright (c) 2005-2007 Paul Hsieh
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ *  Version 0.1.11
+ *
+ *  The ANSI C standard committee, for the C99 standard, specified the
+ *  inclusion of a new standard include file called stdint.h.  This is
+ *  a very useful and long desired include file which contains several
+ *  very precise definitions for integer scalar types that is
+ *  critically important for making portable several classes of
+ *  applications including cryptography, hashing, variable length
+ *  integer libraries and so on.  But for most developers its likely
+ *  useful just for programming sanity.
+ *
+ *  The problem is that most compiler vendors have decided not to
+ *  implement the C99 standard, and the next C++ language standard
+ *  (which has a lot more mindshare these days) will be a long time in
+ *  coming and its unknown whether or not it will include stdint.h or
+ *  how much adoption it will have.  Either way, it will be a long time
+ *  before all compilers come with a stdint.h and it also does nothing
+ *  for the extremely large number of compilers available today which
+ *  do not include this file, or anything comparable to it.
+ *
+ *  So that's what this file is all about.  Its an attempt to build a
+ *  single universal include file that works on as many platforms as
+ *  possible to deliver what stdint.h is supposed to.  A few things
+ *  that should be noted about this file:
+ *
+ *    1) It is not guaranteed to be portable and/or present an identical
+ *       interface on all platforms.  The extreme variability of the
+ *       ANSI C standard makes this an impossibility right from the
+ *       very get go. Its really only meant to be useful for the vast
+ *       majority of platforms that possess the capability of
+ *       implementing usefully and precisely defined, standard sized
+ *       integer scalars.  Systems which are not intrinsically 2s
+ *       complement may produce invalid constants.
+ *
+ *    2) There is an unavoidable use of non-reserved symbols.
+ *
+ *    3) Other standard include files are invoked.
+ *
+ *    4) This file may come in conflict with future platforms that do
+ *       include stdint.h.  The hope is that one or the other can be
+ *       used with no real difference.
+ *
+ *    5) In the current verison, if your platform can't represent
+ *       int32_t, int16_t and int8_t, it just dumps out with a compiler
+ *       error.
+ *
+ *    6) 64 bit integers may or may not be defined.  Test for their
+ *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ *       Note that this is different from the C99 specification which
+ *       requires the existence of 64 bit support in the compiler.  If
+ *       this is not defined for your platform, yet it is capable of
+ *       dealing with 64 bits then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities.
+ *
+ *    7) (u)intptr_t may or may not be defined.  Test for its presence
+ *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
+ *       for your platform, then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities, not
+ *       because its optional.
+ *
+ *    8) The following might not been defined even if your platform is
+ *       capable of defining it:
+ *
+ *       WCHAR_MIN
+ *       WCHAR_MAX
+ *       (u)int64_t
+ *       PTRDIFF_MIN
+ *       PTRDIFF_MAX
+ *       (u)intptr_t
+ *
+ *    9) The following have not been defined:
+ *
+ *       WINT_MIN
+ *       WINT_MAX
+ *
+ *   10) The criteria for defining (u)int_least(*)_t isn't clear,
+ *       except for systems which don't have a type that precisely
+ *       defined 8, 16, or 32 bit types (which this include file does
+ *       not support anyways). Default definitions have been given.
+ *
+ *   11) The criteria for defining (u)int_fast(*)_t isn't something I
+ *       would trust to any particular compiler vendor or the ANSI C
+ *       committee.  It is well known that "compatible systems" are
+ *       commonly created that have very different performance
+ *       characteristics from the systems they are compatible with,
+ *       especially those whose vendors make both the compiler and the
+ *       system.  Default definitions have been given, but its strongly
+ *       recommended that users never use these definitions for any
+ *       reason (they do *NOT* deliver any serious guarantee of
+ *       improved performance -- not in this file, nor any vendor's
+ *       stdint.h).
+ *
+ *   12) The following macros:
+ *
+ *       PRINTF_INTMAX_MODIFIER
+ *       PRINTF_INT64_MODIFIER
+ *       PRINTF_INT32_MODIFIER
+ *       PRINTF_INT16_MODIFIER
+ *       PRINTF_LEAST64_MODIFIER
+ *       PRINTF_LEAST32_MODIFIER
+ *       PRINTF_LEAST16_MODIFIER
+ *       PRINTF_INTPTR_MODIFIER
+ *
+ *       are strings which have been defined as the modifiers required
+ *       for the "d", "u" and "x" printf formats to correctly output
+ *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
+ *       defined if INT64_MAX is not defined.  These are an extension
+ *       beyond what C99 specifies must be in stdint.h.
+ *
+ *       In addition, the following macros are defined:
+ *
+ *       PRINTF_INTMAX_HEX_WIDTH
+ *       PRINTF_INT64_HEX_WIDTH
+ *       PRINTF_INT32_HEX_WIDTH
+ *       PRINTF_INT16_HEX_WIDTH
+ *       PRINTF_INT8_HEX_WIDTH
+ *       PRINTF_INTMAX_DEC_WIDTH
+ *       PRINTF_INT64_DEC_WIDTH
+ *       PRINTF_INT32_DEC_WIDTH
+ *       PRINTF_INT16_DEC_WIDTH
+ *       PRINTF_INT8_DEC_WIDTH
+ *
+ *       Which specifies the maximum number of characters required to
+ *       print the number of that type in either hexadecimal or decimal.
+ *       These are an extension beyond what C99 specifies must be in
+ *       stdint.h.
+ *
+ *  Compilers tested (all with 0 warnings at their highest respective
+ *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ *  This file should be considered a work in progress.  Suggestions for
+ *  improvements, especially those which increase coverage are strongly
+ *  encouraged.
+ *
+ *  Acknowledgements
+ *
+ *  The following people have made significant contributions to the
+ *  development and testing of this file:
+ *
+ *  Chris Howie
+ *  John Steele Scott
+ *  Dave Thorup
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# ifndef PRINTF_INT64_MODIFIER
+#  define PRINTF_INT64_MODIFIER "ll"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+#  define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+#  define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+#  define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+#  define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+#  define PRINTF_INT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+#  define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+#  define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+#  define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+
+/*
+ *  Something really weird is going on with Open Watcom.  Just pull some of
+ *  these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+#  if !defined (INT64_C)
+#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
+#  endif
+#  if !defined (UINT64_C)
+#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
+#  endif
+#  if !defined (INT32_C)
+#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
+#  endif
+#  if !defined (UINT32_C)
+#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
+#  endif
+#  if !defined (INT16_C)
+#   define INT16_C(x)   (x)
+#  endif
+#  if !defined (UINT16_C)
+#   define UINT16_C(x)  (x)
+#  endif
+#  if !defined (INT8_C)
+#   define INT8_C(x)   (x)
+#  endif
+#  if !defined (UINT8_C)
+#   define UINT8_C(x)  (x)
+#  endif
+#  if !defined (UINT64_MAX)
+#   define UINT64_MAX  18446744073709551615ULL
+#  endif
+#  if !defined (INT64_MAX)
+#   define INT64_MAX  9223372036854775807LL
+#  endif
+#  if !defined (UINT32_MAX)
+#   define UINT32_MAX  4294967295UL
+#  endif
+#  if !defined (INT32_MAX)
+#   define INT32_MAX  2147483647L
+#  endif
+#  if !defined (INTMAX_MAX)
+#   define INTMAX_MAX INT64_MAX
+#  endif
+#  if !defined (INTMAX_MIN)
+#   define INTMAX_MIN INT64_MIN
+#  endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ *  Deduce the type assignments from limits.h under the assumption that
+ *  integer sizes in bits are powers of 2, and follow the ANSI
+ *  definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#ifndef uint8_t
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+    typedef unsigned char uint8_t;
+#   define UINT8_C(v) ((uint8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#ifndef int8_t
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+    typedef signed char int8_t;
+#   define INT8_C(v) ((int8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#ifndef uint16_t
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+  typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+  typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#ifndef int16_t
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+  typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+  typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#ifndef uint32_t
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+  typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+  typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+  typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#ifndef int32_t
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+  typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+  typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+  typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ *  The macro stdint_int64_defined is temporarily used to record
+ *  whether or not 64 integer support is available.  It must be
+ *  defined for any 64 integer extensions for new platforms that are
+ *  added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+#  define stdint_int64_defined
+   __extension__ typedef long long int64_t;
+   __extension__ typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+#  define stdint_int64_defined
+   typedef __int64 int64_t;
+   typedef unsigned __int64 uint64_t;
+#  define UINT64_C(v) v ## UI64
+#  define  INT64_C(v) v ## I64
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "I64"
+#  endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ *  Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+
+/*
+ *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
+ *  we don't need to worry about that until about 2040 at which point
+ *  we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+  typedef int64_t intmax_t;
+  typedef uint64_t uintmax_t;
+# define  INTMAX_MAX   INT64_MAX
+# define  INTMAX_MIN   INT64_MIN
+# define UINTMAX_MAX  UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define  INTMAX_C(v)  INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+  typedef int32_t intmax_t;
+  typedef uint32_t uintmax_t;
+# define  INTMAX_MAX   INT32_MAX
+# define UINTMAX_MAX  UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define  INTMAX_C(v)  INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ *  Because this file currently only supports platforms which have
+ *  precise powers of 2 as bit sizes for the default integers, the
+ *  least definitions are all trivial.  Its possible that a future
+ *  version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+  typedef   int8_t   int_least8_t;
+  typedef  uint8_t  uint_least8_t;
+  typedef  int16_t  int_least16_t;
+  typedef uint16_t uint_least16_t;
+  typedef  int32_t  int_least32_t;
+  typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define  UINT_LEAST8_MAX  UINT8_MAX
+# define   INT_LEAST8_MAX   INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define  INT_LEAST16_MAX  INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define  INT_LEAST32_MAX  INT32_MAX
+# define   INT_LEAST8_MIN   INT8_MIN
+# define  INT_LEAST16_MIN  INT16_MIN
+# define  INT_LEAST32_MIN  INT32_MIN
+# ifdef stdint_int64_defined
+    typedef  int64_t  int_least64_t;
+    typedef uint64_t uint_least64_t;
+#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+#   define UINT_LEAST64_MAX UINT64_MAX
+#   define  INT_LEAST64_MAX  INT64_MAX
+#   define  INT_LEAST64_MIN  INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ *  The ANSI C committee pretending to know or specify anything about
+ *  performance is the epitome of misguided arrogance.  The mandate of
+ *  this file is to *ONLY* ever support that absolute minimum
+ *  definition of the fast integer types, for compatibility purposes.
+ *  No extensions, and no attempt to suggest what may or may not be a
+ *  faster integer type will ever be made in this file.  Developers are
+ *  warned to stay away from these types when using this or any other
+ *  stdint.h.
+ */
+
+typedef   int_least8_t   int_fast8_t;
+typedef  uint_least8_t  uint_fast8_t;
+typedef  int_least16_t  int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef  int_least32_t  int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
+#define   INT_FAST8_MAX   INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define  INT_FAST16_MAX  INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define  INT_FAST32_MAX  INT_LEAST32_MAX
+#define   INT_FAST8_MIN   INT_LEAST8_MIN
+#define  INT_FAST16_MIN  INT_LEAST16_MIN
+#define  INT_FAST32_MIN  INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+  typedef  int_least64_t  int_fast64_t;
+  typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define  INT_FAST64_MAX  INT_LEAST64_MAX
+# define  INT_FAST64_MIN  INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ *  Whatever piecemeal, per compiler thing we can do about the wchar_t
+ *  type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+#  define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+#  define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ *  Whatever piecemeal, per compiler/platform thing we can do about the
+ *  (u)intptr_t types and limits.
+ */
+
+#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
+#  define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+#    define stdint_intptr_bits 16
+#  else
+#    define stdint_intptr_bits 32
+#  endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
+#  define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what will Intel do about x86-64? */
+# endif
+
+# ifdef stdint_intptr_bits
+#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
+#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
+#  ifndef PRINTF_INTPTR_MODIFIER
+#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+#  endif
+#  ifndef PTRDIFF_MAX
+#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef PTRDIFF_MIN
+#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef UINTPTR_MAX
+#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MAX
+#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MIN
+#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef INTPTR_C
+#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+#  endif
+#  ifndef UINTPTR_C
+#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+#  endif
+  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+   nothing for the definition of uintptr_t. */
+  typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/* 
+ *  Please compile with the maximum warning settings to make sure macros are not
+ *  defined more than once.
+ */
+ 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+ 
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
+ 
+int main () {
+  DECL(I,8)
+  DECL(U,8)
+  DECL(I,16)
+  DECL(U,16)
+  DECL(I,32)
+  DECL(U,32)
+#ifdef INT64_MAX
+  DECL(I,64)
+  DECL(U,64)
+#endif
+  intmax_t imax = INTMAX_C(0);
+  uintmax_t umax = UINTMAX_C(0);
+  char str0[256], str1[256];
+
+  sprintf (str0, "%d %x\n", 0, ~0);
+  
+  sprintf (str1, "%d %x\n",  i8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+  sprintf (str1, "%d %x\n",  i16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+#ifdef INT64_MAX	
+  sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
+  
+  TESTUMAX(8);
+  TESTUMAX(16);
+  TESTUMAX(32);
+#ifdef INT64_MAX
+  TESTUMAX(64);
+#endif
+
+  return EXIT_SUCCESS;
+}
+
+#endif
diff --git a/sha1.cpp b/sha1.cpp
new file mode 100644
index 0000000..0e23c31
--- /dev/null
+++ b/sha1.cpp
@@ -0,0 +1,325 @@
+/*
+SHA-1 in C
+By Steve Reid <sreid@sea-to-sky.net>
+100% Public Domain
+
+-----------------
+Modified 7/98
+By James H. Brown <jbrown@burgoyne.com>
+Still 100% Public Domain
+
+Corrected a problem which generated improper hash values on 16 bit machines
+Routine SHA1Update changed from
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
+len)
+to
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
+long len)
+
+The 'len' parameter was declared an int which works fine on 32 bit machines.
+However, on 16 bit machines an int is too small for the shifts being done
+against
+it.  This caused the hash function to generate incorrect values if len was
+greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
+
+Since the file IO in main() reads 16K at a time, any file 8K or larger would
+be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
+"a"s).
+
+I also changed the declaration of variables i & j in SHA1Update to
+unsigned long from unsigned int for the same reason.
+
+These changes should make no difference to any 32 bit implementations since
+an
+int and a long are the same size in those environments.
+
+--
+I also corrected a few compiler warnings generated by Borland C.
+1. Added #include <process.h> for exit() prototype
+2. Removed unused variable 'j' in SHA1Final
+3. Changed exit(0) to return(0) at end of main.
+
+ALL changes I made can be located by searching for comments containing 'JHB'
+-----------------
+Modified 8/98
+By Steve Reid <sreid@sea-to-sky.net>
+Still 100% public domain
+
+1- Removed #include <process.h> and used return() instead of exit()
+2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
+3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
+
+-----------------
+Modified 4/01
+By Saul Kravitz <Saul.Kravitz@celera.com>
+Still 100% PD
+Modified to run on Compaq Alpha hardware.
+
+-----------------
+Modified 07/2002
+By Ralph Giles <giles@ghostscript.com>
+Still 100% public domain
+modified for use with stdint types, autoconf
+code cleanup, removed attribution comments
+switched SHA1Final() argument order for consistency
+use SHA1_ prefix for public api
+move public api to sha1.h
+*/
+
+/*
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "sha1.h"
+
+#if defined(_MSC_VER)
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4996)
+#pragma warning(disable : 4100)
+#endif
+
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
+
+#define rol ROTL32
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+/* FIXME: can we do this in an endian-proof way? */
+
+#ifdef WORDS_BIGENDIAN
+#define blk0(i) block->l[i]
+#else
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
+{
+    uint32_t a, b, c, d, e;
+    typedef union {
+        uint8_t c[64];
+        uint32_t l[16];
+    } CHAR64LONG16;
+    CHAR64LONG16* block;
+
+    block = (CHAR64LONG16*)buffer;
+
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+
+    /* Wipe variables */
+    a = b = c = d = e = 0;
+}
+
+
+/* SHA1Init - Initialize new context */
+void SHA1_Init(SHA1_CTX* context)
+{
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = 0;
+  context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
+{
+    size_t i, j;
+
+    j = (context->count[0] >> 3) & 63;
+    if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
+
+    context->count[1] += (len >> 29);
+
+    if ((j + len) > 63) 
+  {
+        memcpy(&context->buffer[j], data, (i = 64-j));
+        SHA1_Transform(context->state, context->buffer);
+
+        for ( ; i + 63 < len; i += 64) 
+    {
+            SHA1_Transform(context->state, data + i);
+        }
+
+        j = 0;
+    }
+    else i = 0;
+    memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
+{
+    uint32_t i;
+    uint8_t  finalcount[8];
+
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
+    }
+    SHA1_Update(context, (uint8_t *)"\200", 1);
+    while ((context->count[0] & 504) != 448) {
+        SHA1_Update(context, (uint8_t *)"\0", 1);
+    }
+    SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */
+    for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
+        digest[i] = (uint8_t)
+         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+
+    /* Wipe variables */
+    i = 0;
+    memset(context->buffer, 0, 64);
+    memset(context->state, 0, 20);
+    memset(context->count, 0, 8);
+    memset(finalcount, 0, 8);	/* SWR */
+}
+
+//-----------------------------------------------------------------------------
+
+void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
+{
+  SHA1_CTX context;
+
+  uint8_t digest[20];
+
+  SHA1_Init(&context);
+  SHA1_Update(&context, (uint8_t*)key, len);
+  SHA1_Final(&context, digest);
+
+  memcpy(out,&digest[0],4);
+}
+
+//-----------------------------------------------------------------------------
+// self test
+
+//#define TEST
+
+#ifdef TEST
+
+static char *test_data[] = {
+    "abc",
+    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+    "A million repetitions of 'a'"};
+static char *test_results[] = {
+    "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
+    "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
+    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
+
+
+void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
+{
+    int i,j;
+    char *c = output;
+
+    for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
+        for (j = 0; j < 4; j++) {
+            sprintf(c,"%02X", digest[i*4+j]);
+            c += 2;
+        }
+        sprintf(c, " ");
+        c += 1;
+    }
+    *(c - 1) = '\0';
+}
+
+int main(int argc, char** argv)
+{
+    int k;
+    SHA1_CTX context;
+    uint8_t digest[20];
+    char output[80];
+
+    fprintf(stdout, "verifying SHA-1 implementation... ");
+
+    for (k = 0; k < 2; k++){
+        SHA1_Init(&context);
+        SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
+        SHA1_Final(&context, digest);
+  digest_to_hex(digest, output);
+
+        if (strcmp(output, test_results[k])) {
+            fprintf(stdout, "FAIL\n");
+            fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
+            fprintf(stderr,"\t%s returned\n", output);
+            fprintf(stderr,"\t%s is correct\n", test_results[k]);
+            return (1);
+        }
+    }
+    /* million 'a' vector we feed separately */
+    SHA1_Init(&context);
+    for (k = 0; k < 1000000; k++)
+        SHA1_Update(&context, (uint8_t*)"a", 1);
+    SHA1_Final(&context, digest);
+    digest_to_hex(digest, output);
+    if (strcmp(output, test_results[2])) {
+        fprintf(stdout, "FAIL\n");
+        fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
+        fprintf(stderr,"\t%s returned\n", output);
+        fprintf(stderr,"\t%s is correct\n", test_results[2]);
+        return (1);
+    }
+
+    /* success */
+    fprintf(stdout, "ok\n");
+    return(0);
+}
+#endif /* TEST */
diff --git a/sha1.h b/sha1.h
new file mode 100644
index 0000000..16b10a1
--- /dev/null
+++ b/sha1.h
@@ -0,0 +1,21 @@
+/* public api for steve reid's public domain SHA-1 implementation */
+/* this file is in the public domain */
+
+#pragma once
+
+#include "Platform.h"
+
+struct SHA1_CTX
+{
+    uint32_t state[5];
+    uint32_t count[2];
+    uint8_t  buffer[64];
+};
+
+#define SHA1_DIGEST_SIZE 20
+
+void SHA1_Init(SHA1_CTX* context);
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
+
+void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
\ No newline at end of file