clang-r353983c/include/llvm/Support/OnDiskHashTable.h - platform/prebuilts/clang/host/darwin-x86 - Git at Google

 //===--- OnDiskHashTable.h - On-Disk Hash Table Implementation --*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// Defines facilities for reading and writing on-disk hash tables.
 ///
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H
 #define LLVM_SUPPORT_ONDISKHASHTABLE_H

 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>

 namespace llvm {

 /// Generates an on disk hash table.
 ///
 /// This needs an \c Info that handles storing values into the hash table's
 /// payload and computes the hash for a given key. This should provide the
 /// following interface:
 ///
 /// \code
 /// class ExampleInfo {
 /// public:
 ///   typedef ExampleKey key_type;   // Must be copy constructible
 ///   typedef ExampleKey &key_type_ref;
 ///   typedef ExampleData data_type; // Must be copy constructible
 ///   typedef ExampleData &data_type_ref;
 ///   typedef uint32_t hash_value_type; // The type the hash function returns.
 ///   typedef uint32_t offset_type; // The type for offsets into the table.
 ///
 ///   /// Calculate the hash for Key
 ///   static hash_value_type ComputeHash(key_type_ref Key);
 ///   /// Return the lengths, in bytes, of the given Key/Data pair.
 ///   static std::pair<offset_type, offset_type>
 ///   EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
 ///   /// Write Key to Out.  KeyLen is the length from EmitKeyDataLength.
 ///   static void EmitKey(raw_ostream &Out, key_type_ref Key,
 ///                       offset_type KeyLen);
 ///   /// Write Data to Out.  DataLen is the length from EmitKeyDataLength.
 ///   static void EmitData(raw_ostream &Out, key_type_ref Key,
 ///                        data_type_ref Data, offset_type DataLen);
 ///   /// Determine if two keys are equal. Optional, only needed by contains.
 ///   static bool EqualKey(key_type_ref Key1, key_type_ref Key2);
 /// };
 /// \endcode
 template <typename Info> class OnDiskChainedHashTableGenerator {
   /// A single item in the hash table.
   class Item {
   public:
     typename Info::key_type Key;
     typename Info::data_type Data;
     Item *Next;
     const typename Info::hash_value_type Hash;

     Item(typename Info::key_type_ref Key, typename Info::data_type_ref Data,
          Info &InfoObj)
         : Key(Key), Data(Data), Next(nullptr), Hash(InfoObj.ComputeHash(Key)) {}
   };

   typedef typename Info::offset_type offset_type;
   offset_type NumBuckets;
   offset_type NumEntries;
   llvm::SpecificBumpPtrAllocator<Item> BA;

   /// A linked list of values in a particular hash bucket.
   struct Bucket {
     offset_type Off;
     unsigned Length;
     Item *Head;
   };

   Bucket *Buckets;

 private:
   /// Insert an item into the appropriate hash bucket.
   void insert(Bucket *Buckets, size_t Size, Item *E) {
     Bucket &B = Buckets[E->Hash & (Size - 1)];
     E->Next = B.Head;
     ++B.Length;
     B.Head = E;
   }

   /// Resize the hash table, moving the old entries into the new buckets.
   void resize(size_t NewSize) {
     Bucket *NewBuckets = static_cast<Bucket *>(
         safe_calloc(NewSize, sizeof(Bucket)));
     // Populate NewBuckets with the old entries.
     for (size_t I = 0; I < NumBuckets; ++I)
       for (Item *E = Buckets[I].Head; E;) {
         Item *N = E->Next;
         E->Next = nullptr;
         insert(NewBuckets, NewSize, E);
         E = N;
       }

     free(Buckets);
     NumBuckets = NewSize;
     Buckets = NewBuckets;
   }

 public:
   /// Insert an entry into the table.
   void insert(typename Info::key_type_ref Key,
               typename Info::data_type_ref Data) {
     Info InfoObj;
     insert(Key, Data, InfoObj);
   }

   /// Insert an entry into the table.
   ///
   /// Uses the provided Info instead of a stack allocated one.
   void insert(typename Info::key_type_ref Key,
               typename Info::data_type_ref Data, Info &InfoObj) {
     ++NumEntries;
     if (4 * NumEntries >= 3 * NumBuckets)
       resize(NumBuckets * 2);
     insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj));
   }

   /// Determine whether an entry has been inserted.
   bool contains(typename Info::key_type_ref Key, Info &InfoObj) {
     unsigned Hash = InfoObj.ComputeHash(Key);
     for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next)
       if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key))
         return true;
     return false;
   }

   /// Emit the table to Out, which must not be at offset 0.
   offset_type Emit(raw_ostream &Out) {
     Info InfoObj;
     return Emit(Out, InfoObj);
   }

   /// Emit the table to Out, which must not be at offset 0.
   ///
   /// Uses the provided Info instead of a stack allocated one.
   offset_type Emit(raw_ostream &Out, Info &InfoObj) {
     using namespace llvm::support;
     endian::Writer LE(Out, little);

     // Now we're done adding entries, resize the bucket list if it's
     // significantly too large. (This only happens if the number of
     // entries is small and we're within our initial allocation of
     // 64 buckets.) We aim for an occupancy ratio in [3/8, 3/4).
     //
     // As a special case, if there are two or fewer entries, just
     // form a single bucket. A linear scan is fine in that case, and
     // this is very common in C++ class lookup tables. This also
     // guarantees we produce at least one bucket for an empty table.
     //
     // FIXME: Try computing a perfect hash function at this point.
     unsigned TargetNumBuckets =
         NumEntries <= 2 ? 1 : NextPowerOf2(NumEntries * 4 / 3);
     if (TargetNumBuckets != NumBuckets)
       resize(TargetNumBuckets);

     // Emit the payload of the table.
     for (offset_type I = 0; I < NumBuckets; ++I) {
       Bucket &B = Buckets[I];
       if (!B.Head)
         continue;

       // Store the offset for the data of this bucket.
       B.Off = Out.tell();
       assert(B.Off && "Cannot write a bucket at offset 0. Please add padding.");

       // Write out the number of items in the bucket.
       LE.write<uint16_t>(B.Length);
       assert(B.Length != 0 && "Bucket has a head but zero length?");

       // Write out the entries in the bucket.
       for (Item *I = B.Head; I; I = I->Next) {
         LE.write<typename Info::hash_value_type>(I->Hash);
         const std::pair<offset_type, offset_type> &Len =
             InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
 #ifdef NDEBUG
         InfoObj.EmitKey(Out, I->Key, Len.first);
         InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
 #else
         // In asserts mode, check that the users length matches the data they
         // wrote.
         uint64_t KeyStart = Out.tell();
         InfoObj.EmitKey(Out, I->Key, Len.first);
         uint64_t DataStart = Out.tell();
         InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
         uint64_t End = Out.tell();
         assert(offset_type(DataStart - KeyStart) == Len.first &&
                "key length does not match bytes written");
         assert(offset_type(End - DataStart) == Len.second &&
                "data length does not match bytes written");
 #endif
       }
     }

     // Pad with zeros so that we can start the hashtable at an aligned address.
     offset_type TableOff = Out.tell();
     uint64_t N = llvm::OffsetToAlignment(TableOff, alignof(offset_type));
     TableOff += N;
     while (N--)
       LE.write<uint8_t>(0);

     // Emit the hashtable itself.
     LE.write<offset_type>(NumBuckets);
     LE.write<offset_type>(NumEntries);
     for (offset_type I = 0; I < NumBuckets; ++I)
       LE.write<offset_type>(Buckets[I].Off);

     return TableOff;
   }

   OnDiskChainedHashTableGenerator() {
     NumEntries = 0;
     NumBuckets = 64;
     // Note that we do not need to run the constructors of the individual
     // Bucket objects since 'calloc' returns bytes that are all 0.
     Buckets = static_cast<Bucket *>(safe_calloc(NumBuckets, sizeof(Bucket)));
   }

   ~OnDiskChainedHashTableGenerator() { std::free(Buckets); }
 };

 /// Provides lookup on an on disk hash table.
 ///
 /// This needs an \c Info that handles reading values from the hash table's
 /// payload and computes the hash for a given key. This should provide the
 /// following interface:
 ///
 /// \code
 /// class ExampleLookupInfo {
 /// public:
 ///   typedef ExampleData data_type;
 ///   typedef ExampleInternalKey internal_key_type; // The stored key type.
 ///   typedef ExampleKey external_key_type; // The type to pass to find().
 ///   typedef uint32_t hash_value_type; // The type the hash function returns.
 ///   typedef uint32_t offset_type; // The type for offsets into the table.
 ///
 ///   /// Compare two keys for equality.
 ///   static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
 ///   /// Calculate the hash for the given key.
 ///   static hash_value_type ComputeHash(internal_key_type &IKey);
 ///   /// Translate from the semantic type of a key in the hash table to the
 ///   /// type that is actually stored and used for hashing and comparisons.
 ///   /// The internal and external types are often the same, in which case this
 ///   /// can simply return the passed in value.
 ///   static const internal_key_type &GetInternalKey(external_key_type &EKey);
 ///   /// Read the key and data length from Buffer, leaving it pointing at the
 ///   /// following byte.
 ///   static std::pair<offset_type, offset_type>
 ///   ReadKeyDataLength(const unsigned char *&Buffer);
 ///   /// Read the key from Buffer, given the KeyLen as reported from
 ///   /// ReadKeyDataLength.
 ///   const internal_key_type &ReadKey(const unsigned char *Buffer,
 ///                                    offset_type KeyLen);
 ///   /// Read the data for Key from Buffer, given the DataLen as reported from
 ///   /// ReadKeyDataLength.
 ///   data_type ReadData(StringRef Key, const unsigned char *Buffer,
 ///                      offset_type DataLen);
 /// };
 /// \endcode
 template <typename Info> class OnDiskChainedHashTable {
   const typename Info::offset_type NumBuckets;
   const typename Info::offset_type NumEntries;
   const unsigned char *const Buckets;
   const unsigned char *const Base;
   Info InfoObj;

 public:
   typedef Info InfoType;
   typedef typename Info::internal_key_type internal_key_type;
   typedef typename Info::external_key_type external_key_type;
   typedef typename Info::data_type data_type;
   typedef typename Info::hash_value_type hash_value_type;
   typedef typename Info::offset_type offset_type;

   OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
                          const unsigned char *Buckets,
                          const unsigned char *Base,
                          const Info &InfoObj = Info())
       : NumBuckets(NumBuckets), NumEntries(NumEntries), Buckets(Buckets),
         Base(Base), InfoObj(InfoObj) {
     assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
            "'buckets' must have a 4-byte alignment");
   }

   /// Read the number of buckets and the number of entries from a hash table
   /// produced by OnDiskHashTableGenerator::Emit, and advance the Buckets
   /// pointer past them.
   static std::pair<offset_type, offset_type>
   readNumBucketsAndEntries(const unsigned char *&Buckets) {
     assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
            "buckets should be 4-byte aligned.");
     using namespace llvm::support;
     offset_type NumBuckets =
         endian::readNext<offset_type, little, aligned>(Buckets);
     offset_type NumEntries =
         endian::readNext<offset_type, little, aligned>(Buckets);
     return std::make_pair(NumBuckets, NumEntries);
   }

   offset_type getNumBuckets() const { return NumBuckets; }
   offset_type getNumEntries() const { return NumEntries; }
   const unsigned char *getBase() const { return Base; }
   const unsigned char *getBuckets() const { return Buckets; }

   bool isEmpty() const { return NumEntries == 0; }

   class iterator {
     internal_key_type Key;
     const unsigned char *const Data;
     const offset_type Len;
     Info *InfoObj;

   public:
     iterator() : Key(), Data(nullptr), Len(0), InfoObj(nullptr) {}
     iterator(const internal_key_type K, const unsigned char *D, offset_type L,
              Info *InfoObj)
         : Key(K), Data(D), Len(L), InfoObj(InfoObj) {}

     data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); }

     const unsigned char *getDataPtr() const { return Data; }
     offset_type getDataLen() const { return Len; }

     bool operator==(const iterator &X) const { return X.Data == Data; }
     bool operator!=(const iterator &X) const { return X.Data != Data; }
   };

   /// Look up the stored data for a particular key.
   iterator find(const external_key_type &EKey, Info *InfoPtr = nullptr) {
     const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
     hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
     return find_hashed(IKey, KeyHash, InfoPtr);
   }

   /// Look up the stored data for a particular key with a known hash.
   iterator find_hashed(const internal_key_type &IKey, hash_value_type KeyHash,
                        Info *InfoPtr = nullptr) {
     using namespace llvm::support;

     if (!InfoPtr)
       InfoPtr = &InfoObj;

     // Each bucket is just an offset into the hash table file.
     offset_type Idx = KeyHash & (NumBuckets - 1);
     const unsigned char *Bucket = Buckets + sizeof(offset_type) * Idx;

     offset_type Offset = endian::readNext<offset_type, little, aligned>(Bucket);
     if (Offset == 0)
       return iterator(); // Empty bucket.
     const unsigned char *Items = Base + Offset;

     // 'Items' starts with a 16-bit unsigned integer representing the
     // number of items in this bucket.
     unsigned Len = endian::readNext<uint16_t, little, unaligned>(Items);

     for (unsigned i = 0; i < Len; ++i) {
       // Read the hash.
       hash_value_type ItemHash =
           endian::readNext<hash_value_type, little, unaligned>(Items);

       // Determine the length of the key and the data.
       const std::pair<offset_type, offset_type> &L =
           Info::ReadKeyDataLength(Items);
       offset_type ItemLen = L.first + L.second;

       // Compare the hashes.  If they are not the same, skip the entry entirely.
       if (ItemHash != KeyHash) {
         Items += ItemLen;
         continue;
       }

       // Read the key.
       const internal_key_type &X =
           InfoPtr->ReadKey((const unsigned char *const)Items, L.first);

       // If the key doesn't match just skip reading the value.
       if (!InfoPtr->EqualKey(X, IKey)) {
         Items += ItemLen;
         continue;
       }

       // The key matches!
       return iterator(X, Items + L.first, L.second, InfoPtr);
     }

     return iterator();
   }

   iterator end() const { return iterator(); }

   Info &getInfoObj() { return InfoObj; }

   /// Create the hash table.
   ///
   /// \param Buckets is the beginning of the hash table itself, which follows
   /// the payload of entire structure. This is the value returned by
   /// OnDiskHashTableGenerator::Emit.
   ///
   /// \param Base is the point from which all offsets into the structure are
   /// based. This is offset 0 in the stream that was used when Emitting the
   /// table.
   static OnDiskChainedHashTable *Create(const unsigned char *Buckets,
                                         const unsigned char *const Base,
                                         const Info &InfoObj = Info()) {
     assert(Buckets > Base);
     auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets);
     return new OnDiskChainedHashTable<Info>(NumBucketsAndEntries.first,
                                             NumBucketsAndEntries.second,
                                             Buckets, Base, InfoObj);
   }
 };

 /// Provides lookup and iteration over an on disk hash table.
 ///
 /// \copydetails llvm::OnDiskChainedHashTable
 template <typename Info>
 class OnDiskIterableChainedHashTable : public OnDiskChainedHashTable<Info> {
   const unsigned char *Payload;

 public:
   typedef OnDiskChainedHashTable<Info>          base_type;
   typedef typename base_type::internal_key_type internal_key_type;
   typedef typename base_type::external_key_type external_key_type;
   typedef typename base_type::data_type         data_type;
   typedef typename base_type::hash_value_type   hash_value_type;
   typedef typename base_type::offset_type       offset_type;

 private:
   /// Iterates over all of the keys in the table.
   class iterator_base {
     const unsigned char *Ptr;
     offset_type NumItemsInBucketLeft;
     offset_type NumEntriesLeft;

   public:
     typedef external_key_type value_type;

     iterator_base(const unsigned char *const Ptr, offset_type NumEntries)
         : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {}
     iterator_base()
         : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {}

     friend bool operator==(const iterator_base &X, const iterator_base &Y) {
       return X.NumEntriesLeft == Y.NumEntriesLeft;
     }
     friend bool operator!=(const iterator_base &X, const iterator_base &Y) {
       return X.NumEntriesLeft != Y.NumEntriesLeft;
     }

     /// Move to the next item.
     void advance() {
       using namespace llvm::support;
       if (!NumItemsInBucketLeft) {
         // 'Items' starts with a 16-bit unsigned integer representing the
         // number of items in this bucket.
         NumItemsInBucketLeft =
             endian::readNext<uint16_t, little, unaligned>(Ptr);
       }
       Ptr += sizeof(hash_value_type); // Skip the hash.
       // Determine the length of the key and the data.
       const std::pair<offset_type, offset_type> &L =
           Info::ReadKeyDataLength(Ptr);
       Ptr += L.first + L.second;
       assert(NumItemsInBucketLeft);
       --NumItemsInBucketLeft;
       assert(NumEntriesLeft);
       --NumEntriesLeft;
     }

     /// Get the start of the item as written by the trait (after the hash and
     /// immediately before the key and value length).
     const unsigned char *getItem() const {
       return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type);
     }
   };

 public:
   OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
                                  const unsigned char *Buckets,
                                  const unsigned char *Payload,
                                  const unsigned char *Base,
                                  const Info &InfoObj = Info())
       : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
         Payload(Payload) {}

   /// Iterates over all of the keys in the table.
   class key_iterator : public iterator_base {
     Info *InfoObj;

   public:
     typedef external_key_type value_type;

     key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
                  Info *InfoObj)
         : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
     key_iterator() : iterator_base(), InfoObj() {}

     key_iterator &operator++() {
       this->advance();
       return *this;
     }
     key_iterator operator++(int) { // Postincrement
       key_iterator tmp = *this;
       ++*this;
       return tmp;
     }

     internal_key_type getInternalKey() const {
       auto *LocalPtr = this->getItem();

       // Determine the length of the key and the data.
       auto L = Info::ReadKeyDataLength(LocalPtr);

       // Read the key.
       return InfoObj->ReadKey(LocalPtr, L.first);
     }

     value_type operator*() const {
       return InfoObj->GetExternalKey(getInternalKey());
     }
   };

   key_iterator key_begin() {
     return key_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
   }
   key_iterator key_end() { return key_iterator(); }

   iterator_range<key_iterator> keys() {
     return make_range(key_begin(), key_end());
   }

   /// Iterates over all the entries in the table, returning the data.
   class data_iterator : public iterator_base {
     Info *InfoObj;

   public:
     typedef data_type value_type;

     data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
                   Info *InfoObj)
         : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
     data_iterator() : iterator_base(), InfoObj() {}

     data_iterator &operator++() { // Preincrement
       this->advance();
       return *this;
     }
     data_iterator operator++(int) { // Postincrement
       data_iterator tmp = *this;
       ++*this;
       return tmp;
     }

     value_type operator*() const {
       auto *LocalPtr = this->getItem();

       // Determine the length of the key and the data.
       auto L = Info::ReadKeyDataLength(LocalPtr);

       // Read the key.
       const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
       return InfoObj->ReadData(Key, LocalPtr + L.first, L.second);
     }
   };

   data_iterator data_begin() {
     return data_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
   }
   data_iterator data_end() { return data_iterator(); }

   iterator_range<data_iterator> data() {
     return make_range(data_begin(), data_end());
   }

   /// Create the hash table.
   ///
   /// \param Buckets is the beginning of the hash table itself, which follows
   /// the payload of entire structure. This is the value returned by
   /// OnDiskHashTableGenerator::Emit.
   ///
   /// \param Payload is the beginning of the data contained in the table.  This
   /// is Base plus any padding or header data that was stored, ie, the offset
   /// that the stream was at when calling Emit.
   ///
   /// \param Base is the point from which all offsets into the structure are
   /// based. This is offset 0 in the stream that was used when Emitting the
   /// table.
   static OnDiskIterableChainedHashTable *
   Create(const unsigned char *Buckets, const unsigned char *const Payload,
          const unsigned char *const Base, const Info &InfoObj = Info()) {
     assert(Buckets > Base);
     auto NumBucketsAndEntries =
         OnDiskIterableChainedHashTable<Info>::readNumBucketsAndEntries(Buckets);
     return new OnDiskIterableChainedHashTable<Info>(
         NumBucketsAndEntries.first, NumBucketsAndEntries.second,
         Buckets, Payload, Base, InfoObj);
   }
 };

 } // end namespace llvm

 #endif
	//===--- OnDiskHashTable.h - On-Disk Hash Table Implementation --- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// Defines facilities for reading and writing on-disk hash tables.
	///
	//===----------------------------------------------------------------------===//
	#ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H
	#define LLVM_SUPPORT_ONDISKHASHTABLE_H

	#include "llvm/Support/Allocator.h"
	#include "llvm/Support/DataTypes.h"
	#include "llvm/Support/EndianStream.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include <cassert>
	#include <cstdlib>

	namespace llvm {

	/// Generates an on disk hash table.
	///
	/// This needs an \c Info that handles storing values into the hash table's
	/// payload and computes the hash for a given key. This should provide the
	/// following interface:
	///
	/// \code
	/// class ExampleInfo {
	/// public:
	/// typedef ExampleKey key_type; // Must be copy constructible
	/// typedef ExampleKey &key_type_ref;
	/// typedef ExampleData data_type; // Must be copy constructible
	/// typedef ExampleData &data_type_ref;
	/// typedef uint32_t hash_value_type; // The type the hash function returns.
	/// typedef uint32_t offset_type; // The type for offsets into the table.
	///
	/// /// Calculate the hash for Key
	/// static hash_value_type ComputeHash(key_type_ref Key);
	/// /// Return the lengths, in bytes, of the given Key/Data pair.
	/// static std::pair<offset_type, offset_type>
	/// EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
	/// /// Write Key to Out. KeyLen is the length from EmitKeyDataLength.
	/// static void EmitKey(raw_ostream &Out, key_type_ref Key,
	/// offset_type KeyLen);
	/// /// Write Data to Out. DataLen is the length from EmitKeyDataLength.
	/// static void EmitData(raw_ostream &Out, key_type_ref Key,
	/// data_type_ref Data, offset_type DataLen);
	/// /// Determine if two keys are equal. Optional, only needed by contains.
	/// static bool EqualKey(key_type_ref Key1, key_type_ref Key2);
	/// };
	/// \endcode
	template <typename Info> class OnDiskChainedHashTableGenerator {
	/// A single item in the hash table.
	class Item {
	public:
	typename Info::key_type Key;
	typename Info::data_type Data;
	Item *Next;
	const typename Info::hash_value_type Hash;

	Item(typename Info::key_type_ref Key, typename Info::data_type_ref Data,
	Info &InfoObj)
	: Key(Key), Data(Data), Next(nullptr), Hash(InfoObj.ComputeHash(Key)) {}
	};

	typedef typename Info::offset_type offset_type;
	offset_type NumBuckets;
	offset_type NumEntries;
	llvm::SpecificBumpPtrAllocator<Item> BA;

	/// A linked list of values in a particular hash bucket.
	struct Bucket {
	offset_type Off;
	unsigned Length;
	Item *Head;
	};

	Bucket *Buckets;

	private:
	/// Insert an item into the appropriate hash bucket.
	void insert(Bucket Buckets, size_t Size, Item E) {
	Bucket &B = Buckets[E->Hash & (Size - 1)];
	E->Next = B.Head;
	++B.Length;
	B.Head = E;
	}

	/// Resize the hash table, moving the old entries into the new buckets.
	void resize(size_t NewSize) {
	Bucket NewBuckets = static_cast<Bucket >(
	safe_calloc(NewSize, sizeof(Bucket)));
	// Populate NewBuckets with the old entries.
	for (size_t I = 0; I < NumBuckets; ++I)
	for (Item *E = Buckets[I].Head; E;) {
	Item *N = E->Next;
	E->Next = nullptr;
	insert(NewBuckets, NewSize, E);
	E = N;
	}

	free(Buckets);
	NumBuckets = NewSize;
	Buckets = NewBuckets;
	}

	public:
	/// Insert an entry into the table.
	void insert(typename Info::key_type_ref Key,
	typename Info::data_type_ref Data) {
	Info InfoObj;
	insert(Key, Data, InfoObj);
	}

	/// Insert an entry into the table.
	///
	/// Uses the provided Info instead of a stack allocated one.
	void insert(typename Info::key_type_ref Key,
	typename Info::data_type_ref Data, Info &InfoObj) {
	++NumEntries;
	if (4 * NumEntries >= 3 * NumBuckets)
	resize(NumBuckets * 2);
	insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj));
	}

	/// Determine whether an entry has been inserted.
	bool contains(typename Info::key_type_ref Key, Info &InfoObj) {
	unsigned Hash = InfoObj.ComputeHash(Key);
	for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next)
	if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key))
	return true;
	return false;
	}

	/// Emit the table to Out, which must not be at offset 0.
	offset_type Emit(raw_ostream &Out) {
	Info InfoObj;
	return Emit(Out, InfoObj);
	}

	/// Emit the table to Out, which must not be at offset 0.
	///
	/// Uses the provided Info instead of a stack allocated one.
	offset_type Emit(raw_ostream &Out, Info &InfoObj) {
	using namespace llvm::support;
	endian::Writer LE(Out, little);

	// Now we're done adding entries, resize the bucket list if it's
	// significantly too large. (This only happens if the number of
	// entries is small and we're within our initial allocation of
	// 64 buckets.) We aim for an occupancy ratio in [3/8, 3/4).
	//
	// As a special case, if there are two or fewer entries, just
	// form a single bucket. A linear scan is fine in that case, and
	// this is very common in C++ class lookup tables. This also
	// guarantees we produce at least one bucket for an empty table.
	//
	// FIXME: Try computing a perfect hash function at this point.
	unsigned TargetNumBuckets =
	NumEntries <= 2 ? 1 : NextPowerOf2(NumEntries * 4 / 3);
	if (TargetNumBuckets != NumBuckets)
	resize(TargetNumBuckets);

	// Emit the payload of the table.
	for (offset_type I = 0; I < NumBuckets; ++I) {
	Bucket &B = Buckets[I];
	if (!B.Head)
	continue;

	// Store the offset for the data of this bucket.
	B.Off = Out.tell();
	assert(B.Off && "Cannot write a bucket at offset 0. Please add padding.");

	// Write out the number of items in the bucket.
	LE.write<uint16_t>(B.Length);
	assert(B.Length != 0 && "Bucket has a head but zero length?");

	// Write out the entries in the bucket.
	for (Item *I = B.Head; I; I = I->Next) {
	LE.write<typename Info::hash_value_type>(I->Hash);
	const std::pair<offset_type, offset_type> &Len =
	InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
	#ifdef NDEBUG
	InfoObj.EmitKey(Out, I->Key, Len.first);
	InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
	#else
	// In asserts mode, check that the users length matches the data they
	// wrote.
	uint64_t KeyStart = Out.tell();
	InfoObj.EmitKey(Out, I->Key, Len.first);
	uint64_t DataStart = Out.tell();
	InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
	uint64_t End = Out.tell();
	assert(offset_type(DataStart - KeyStart) == Len.first &&
	"key length does not match bytes written");
	assert(offset_type(End - DataStart) == Len.second &&
	"data length does not match bytes written");
	#endif
	}
	}

	// Pad with zeros so that we can start the hashtable at an aligned address.
	offset_type TableOff = Out.tell();
	uint64_t N = llvm::OffsetToAlignment(TableOff, alignof(offset_type));
	TableOff += N;
	while (N--)
	LE.write<uint8_t>(0);

	// Emit the hashtable itself.
	LE.write<offset_type>(NumBuckets);
	LE.write<offset_type>(NumEntries);
	for (offset_type I = 0; I < NumBuckets; ++I)
	LE.write<offset_type>(Buckets[I].Off);

	return TableOff;
	}

	OnDiskChainedHashTableGenerator() {
	NumEntries = 0;
	NumBuckets = 64;
	// Note that we do not need to run the constructors of the individual
	// Bucket objects since 'calloc' returns bytes that are all 0.
	Buckets = static_cast<Bucket *>(safe_calloc(NumBuckets, sizeof(Bucket)));
	}

	~OnDiskChainedHashTableGenerator() { std::free(Buckets); }
	};

	/// Provides lookup on an on disk hash table.
	///
	/// This needs an \c Info that handles reading values from the hash table's
	/// payload and computes the hash for a given key. This should provide the
	/// following interface:
	///
	/// \code
	/// class ExampleLookupInfo {
	/// public:
	/// typedef ExampleData data_type;
	/// typedef ExampleInternalKey internal_key_type; // The stored key type.
	/// typedef ExampleKey external_key_type; // The type to pass to find().
	/// typedef uint32_t hash_value_type; // The type the hash function returns.
	/// typedef uint32_t offset_type; // The type for offsets into the table.
	///
	/// /// Compare two keys for equality.
	/// static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
	/// /// Calculate the hash for the given key.
	/// static hash_value_type ComputeHash(internal_key_type &IKey);
	/// /// Translate from the semantic type of a key in the hash table to the
	/// /// type that is actually stored and used for hashing and comparisons.
	/// /// The internal and external types are often the same, in which case this
	/// /// can simply return the passed in value.
	/// static const internal_key_type &GetInternalKey(external_key_type &EKey);
	/// /// Read the key and data length from Buffer, leaving it pointing at the
	/// /// following byte.
	/// static std::pair<offset_type, offset_type>
	/// ReadKeyDataLength(const unsigned char *&Buffer);
	/// /// Read the key from Buffer, given the KeyLen as reported from
	/// /// ReadKeyDataLength.
	/// const internal_key_type &ReadKey(const unsigned char *Buffer,
	/// offset_type KeyLen);
	/// /// Read the data for Key from Buffer, given the DataLen as reported from
	/// /// ReadKeyDataLength.
	/// data_type ReadData(StringRef Key, const unsigned char *Buffer,
	/// offset_type DataLen);
	/// };
	/// \endcode
	template <typename Info> class OnDiskChainedHashTable {
	const typename Info::offset_type NumBuckets;
	const typename Info::offset_type NumEntries;
	const unsigned char *const Buckets;
	const unsigned char *const Base;
	Info InfoObj;

	public:
	typedef Info InfoType;
	typedef typename Info::internal_key_type internal_key_type;
	typedef typename Info::external_key_type external_key_type;
	typedef typename Info::data_type data_type;
	typedef typename Info::hash_value_type hash_value_type;
	typedef typename Info::offset_type offset_type;

	OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
	const unsigned char *Buckets,
	const unsigned char *Base,
	const Info &InfoObj = Info())
	: NumBuckets(NumBuckets), NumEntries(NumEntries), Buckets(Buckets),
	Base(Base), InfoObj(InfoObj) {
	assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
	"'buckets' must have a 4-byte alignment");
	}

	/// Read the number of buckets and the number of entries from a hash table
	/// produced by OnDiskHashTableGenerator::Emit, and advance the Buckets
	/// pointer past them.
	static std::pair<offset_type, offset_type>
	readNumBucketsAndEntries(const unsigned char *&Buckets) {
	assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
	"buckets should be 4-byte aligned.");
	using namespace llvm::support;
	offset_type NumBuckets =
	endian::readNext<offset_type, little, aligned>(Buckets);
	offset_type NumEntries =
	endian::readNext<offset_type, little, aligned>(Buckets);
	return std::make_pair(NumBuckets, NumEntries);
	}

	offset_type getNumBuckets() const { return NumBuckets; }
	offset_type getNumEntries() const { return NumEntries; }
	const unsigned char *getBase() const { return Base; }
	const unsigned char *getBuckets() const { return Buckets; }

	bool isEmpty() const { return NumEntries == 0; }

	class iterator {
	internal_key_type Key;
	const unsigned char *const Data;
	const offset_type Len;
	Info *InfoObj;

	public:
	iterator() : Key(), Data(nullptr), Len(0), InfoObj(nullptr) {}
	iterator(const internal_key_type K, const unsigned char *D, offset_type L,
	Info *InfoObj)
	: Key(K), Data(D), Len(L), InfoObj(InfoObj) {}

	data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); }

	const unsigned char *getDataPtr() const { return Data; }
	offset_type getDataLen() const { return Len; }

	bool operator==(const iterator &X) const { return X.Data == Data; }
	bool operator!=(const iterator &X) const { return X.Data != Data; }
	};

	/// Look up the stored data for a particular key.
	iterator find(const external_key_type &EKey, Info *InfoPtr = nullptr) {
	const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
	hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
	return find_hashed(IKey, KeyHash, InfoPtr);
	}

	/// Look up the stored data for a particular key with a known hash.
	iterator find_hashed(const internal_key_type &IKey, hash_value_type KeyHash,
	Info *InfoPtr = nullptr) {
	using namespace llvm::support;

	if (!InfoPtr)
	InfoPtr = &InfoObj;

	// Each bucket is just an offset into the hash table file.
	offset_type Idx = KeyHash & (NumBuckets - 1);
	const unsigned char Bucket = Buckets + sizeof(offset_type) Idx;

	offset_type Offset = endian::readNext<offset_type, little, aligned>(Bucket);
	if (Offset == 0)
	return iterator(); // Empty bucket.
	const unsigned char *Items = Base + Offset;

	// 'Items' starts with a 16-bit unsigned integer representing the
	// number of items in this bucket.
	unsigned Len = endian::readNext<uint16_t, little, unaligned>(Items);

	for (unsigned i = 0; i < Len; ++i) {
	// Read the hash.
	hash_value_type ItemHash =
	endian::readNext<hash_value_type, little, unaligned>(Items);

	// Determine the length of the key and the data.
	const std::pair<offset_type, offset_type> &L =
	Info::ReadKeyDataLength(Items);
	offset_type ItemLen = L.first + L.second;

	// Compare the hashes. If they are not the same, skip the entry entirely.
	if (ItemHash != KeyHash) {
	Items += ItemLen;
	continue;
	}

	// Read the key.
	const internal_key_type &X =
	InfoPtr->ReadKey((const unsigned char *const)Items, L.first);

	// If the key doesn't match just skip reading the value.
	if (!InfoPtr->EqualKey(X, IKey)) {
	Items += ItemLen;
	continue;
	}

	// The key matches!
	return iterator(X, Items + L.first, L.second, InfoPtr);
	}

	return iterator();
	}

	iterator end() const { return iterator(); }

	Info &getInfoObj() { return InfoObj; }

	/// Create the hash table.
	///
	/// \param Buckets is the beginning of the hash table itself, which follows
	/// the payload of entire structure. This is the value returned by
	/// OnDiskHashTableGenerator::Emit.
	///
	/// \param Base is the point from which all offsets into the structure are
	/// based. This is offset 0 in the stream that was used when Emitting the
	/// table.
	static OnDiskChainedHashTable Create(const unsigned char Buckets,
	const unsigned char *const Base,
	const Info &InfoObj = Info()) {
	assert(Buckets > Base);
	auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets);
	return new OnDiskChainedHashTable<Info>(NumBucketsAndEntries.first,
	NumBucketsAndEntries.second,
	Buckets, Base, InfoObj);
	}
	};

	/// Provides lookup and iteration over an on disk hash table.
	///
	/// \copydetails llvm::OnDiskChainedHashTable
	template <typename Info>
	class OnDiskIterableChainedHashTable : public OnDiskChainedHashTable<Info> {
	const unsigned char *Payload;

	public:
	typedef OnDiskChainedHashTable<Info> base_type;
	typedef typename base_type::internal_key_type internal_key_type;
	typedef typename base_type::external_key_type external_key_type;
	typedef typename base_type::data_type data_type;
	typedef typename base_type::hash_value_type hash_value_type;
	typedef typename base_type::offset_type offset_type;

	private:
	/// Iterates over all of the keys in the table.
	class iterator_base {
	const unsigned char *Ptr;
	offset_type NumItemsInBucketLeft;
	offset_type NumEntriesLeft;

	public:
	typedef external_key_type value_type;

	iterator_base(const unsigned char *const Ptr, offset_type NumEntries)
	: Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {}
	iterator_base()
	: Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {}

	friend bool operator==(const iterator_base &X, const iterator_base &Y) {
	return X.NumEntriesLeft == Y.NumEntriesLeft;
	}
	friend bool operator!=(const iterator_base &X, const iterator_base &Y) {
	return X.NumEntriesLeft != Y.NumEntriesLeft;
	}

	/// Move to the next item.
	void advance() {
	using namespace llvm::support;
	if (!NumItemsInBucketLeft) {
	// 'Items' starts with a 16-bit unsigned integer representing the
	// number of items in this bucket.
	NumItemsInBucketLeft =
	endian::readNext<uint16_t, little, unaligned>(Ptr);
	}
	Ptr += sizeof(hash_value_type); // Skip the hash.
	// Determine the length of the key and the data.
	const std::pair<offset_type, offset_type> &L =
	Info::ReadKeyDataLength(Ptr);
	Ptr += L.first + L.second;
	assert(NumItemsInBucketLeft);
	--NumItemsInBucketLeft;
	assert(NumEntriesLeft);
	--NumEntriesLeft;
	}

	/// Get the start of the item as written by the trait (after the hash and
	/// immediately before the key and value length).
	const unsigned char *getItem() const {
	return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type);
	}
	};

	public:
	OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
	const unsigned char *Buckets,
	const unsigned char *Payload,
	const unsigned char *Base,
	const Info &InfoObj = Info())
	: base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
	Payload(Payload) {}

	/// Iterates over all of the keys in the table.
	class key_iterator : public iterator_base {
	Info *InfoObj;

	public:
	typedef external_key_type value_type;

	key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
	Info *InfoObj)
	: iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
	key_iterator() : iterator_base(), InfoObj() {}

	key_iterator &operator++() {
	this->advance();
	return *this;
	}
	key_iterator operator++(int) { // Postincrement
	key_iterator tmp = *this;
	++*this;
	return tmp;
	}

	internal_key_type getInternalKey() const {
	auto *LocalPtr = this->getItem();

	// Determine the length of the key and the data.
	auto L = Info::ReadKeyDataLength(LocalPtr);

	// Read the key.
	return InfoObj->ReadKey(LocalPtr, L.first);
	}

	value_type operator*() const {
	return InfoObj->GetExternalKey(getInternalKey());
	}
	};

	key_iterator key_begin() {
	return key_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
	}
	key_iterator key_end() { return key_iterator(); }

	iterator_range<key_iterator> keys() {
	return make_range(key_begin(), key_end());
	}

	/// Iterates over all the entries in the table, returning the data.
	class data_iterator : public iterator_base {
	Info *InfoObj;

	public:
	typedef data_type value_type;

	data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
	Info *InfoObj)
	: iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
	data_iterator() : iterator_base(), InfoObj() {}

	data_iterator &operator++() { // Preincrement
	this->advance();
	return *this;
	}
	data_iterator operator++(int) { // Postincrement
	data_iterator tmp = *this;
	++*this;
	return tmp;
	}

	value_type operator*() const {
	auto *LocalPtr = this->getItem();

	// Determine the length of the key and the data.
	auto L = Info::ReadKeyDataLength(LocalPtr);

	// Read the key.
	const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
	return InfoObj->ReadData(Key, LocalPtr + L.first, L.second);
	}
	};

	data_iterator data_begin() {
	return data_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
	}
	data_iterator data_end() { return data_iterator(); }

	iterator_range<data_iterator> data() {
	return make_range(data_begin(), data_end());
	}

	/// Create the hash table.
	///
	/// \param Buckets is the beginning of the hash table itself, which follows
	/// the payload of entire structure. This is the value returned by
	/// OnDiskHashTableGenerator::Emit.
	///
	/// \param Payload is the beginning of the data contained in the table. This
	/// is Base plus any padding or header data that was stored, ie, the offset
	/// that the stream was at when calling Emit.
	///
	/// \param Base is the point from which all offsets into the structure are
	/// based. This is offset 0 in the stream that was used when Emitting the
	/// table.
	static OnDiskIterableChainedHashTable *
	Create(const unsigned char Buckets, const unsigned char const Payload,
	const unsigned char *const Base, const Info &InfoObj = Info()) {
	assert(Buckets > Base);
	auto NumBucketsAndEntries =
	OnDiskIterableChainedHashTable<Info>::readNumBucketsAndEntries(Buckets);
	return new OnDiskIterableChainedHashTable<Info>(
	NumBucketsAndEntries.first, NumBucketsAndEntries.second,
	Buckets, Payload, Base, InfoObj);
	}
	};

	} // end namespace llvm

	#endif