current/sdk/include/tools/dexter/slicer/export/slicer/hash_table.h - platform/prebuilts/module_sdk/art - Git at Google

 /*
  * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #pragma once

 #include <vector>
 #include <cstdint>
 #include <memory>

 namespace slicer {

 // A specialized Key -> T* map (note that, unlike std:: containers, the values
 // are always pointers here, and we don't explicitly store the lookup keys)
 //
 // Implemented as an incrementally resizable hash table: we split the logical hash table
 // into two internal fixed size tables, the "full table" and a "insertion table".
 // When the insertion table overflows, we allocate a larger hashtable to replace
 // it and "insertion table" becomes the "full table" (the old "full table" is
 // rehashed into the new hash table)
 //
 // Similar to open addressing hash tables, all the buckets are a single,
 // contiguous array. But this table is growing and the collisions are still handled
 // as chains (using indexes instead of pointers).
 //
 // The result is faster than std::unordered_map and uses ~25% of
 // the memory used by std::unordered_map<const char*, String*>
 //
 // The Hash template argument is a type which must implement:
 //   1. hash function   : uint32_t Hash(const Key& key)
 //   2. key compare     : bool Compare(const Key& key, T* value)
 //   3. key extraction  : Key GetKey(T* value)
 //   4. copy semantics
 //
 template<class Key, class T, class Hash>
 class HashTable {
  private:
   // the index type inside the bucket array
   using Index = uint32_t;

   static constexpr Index kInitialHashBuckets = (1 << 7) - 1;
   static constexpr Index kAvgChainLength = 2;
   static constexpr Index kInvalidIndex = static_cast<Index>(-1);
   static constexpr double kResizeFactor = 1.6;

   struct __attribute__((packed)) Bucket {
     T* value = nullptr;
     Index next = kInvalidIndex;
   };

   class Partition {
    public:
     Partition(Index size, const Hash& hasher);
     bool Insert(T* value);
     T* Lookup(const Key& key, uint32_t hash_value) const;
     Index HashBuckets() const { return hash_buckets_; }
     void InsertAll(const Partition& src);
     void PrintStats(const char* name, bool verbose);

    private:
     std::vector<Bucket> buckets_;
     const Index hash_buckets_;
     Hash hasher_;
   };

  public:
   explicit HashTable(const Hash& hasher = Hash()) : hasher_(hasher) {
     // we start with full_table_ == nullptr
     insertion_table_.reset(new Partition(kInitialHashBuckets, hasher_));
   }

   ~HashTable() = default;

   // No move or copy semantics
   HashTable(const HashTable&) = delete;
   HashTable& operator=(const HashTable&) = delete;

   // Insert a new, non-nullptr T* into the hash table
   // (we only store unique values so the new value must
   // not be in the table already)
   void Insert(T* value);

   // Lookup an existing value
   // (returns nullptr if the value is not found)
   T* Lookup(const Key& key) const;

   void PrintStats(const char* name, bool verbose);

  private:
   std::unique_ptr<Partition> full_table_;
   std::unique_ptr<Partition> insertion_table_;
   Hash hasher_;
 };

 template<class Key, class T, class Hash>
 HashTable<Key, T, Hash>::Partition::Partition(Index size, const Hash& hasher)
     : hash_buckets_(size), hasher_(hasher) {
   // allocate space for the hash buckets + avg chain length
   buckets_.reserve(hash_buckets_ * kAvgChainLength);
   buckets_.resize(hash_buckets_);
 }

 // Similar to the "cellar" version of coalesced hashing,
 // the buckets array is divided into a fixed set of entries
 // addressable by the hash value [0 .. hash_buckets_) and
 // extra buckets for the collision chains [hash_buckets_, buckets_.size())
 // Unlike coalesced hashing, our "cellar" is growing so we don't actually
 // have to coalesce any chains.
 //
 // Returns true if the insertion succeeded, false if the table overflows
 // (we never insert more than the pre-reserved capacity)
 //
 template<class Key, class T, class Hash>
 bool HashTable<Key, T, Hash>::Partition::Insert(T* value) {
   SLICER_CHECK(value != nullptr);
   // overflow?
   if (buckets_.size() + 1 > buckets_.capacity()) {
     return false;
   }
   auto key = hasher_.GetKey(value);
   Index bucket_index = hasher_.Hash(key) % hash_buckets_;
   if (buckets_[bucket_index].value == nullptr) {
     buckets_[bucket_index].value = value;
   } else {
     Bucket new_bucket = {};
     new_bucket.value = value;
     new_bucket.next = buckets_[bucket_index].next;
     buckets_[bucket_index].next = buckets_.size();
     buckets_.push_back(new_bucket);
   }
   return true;
 }

 template<class Key, class T, class Hash>
 T* HashTable<Key, T, Hash>::Partition::Lookup(const Key& key, uint32_t hash_value) const {
   assert(hash_value == hasher_.Hash(key));
   Index bucket_index = hash_value % hash_buckets_;
   for (Index index = bucket_index; index != kInvalidIndex; index = buckets_[index].next) {
     auto value = buckets_[index].value;
     if (value == nullptr) {
       assert(index < hash_buckets_);
       break;
     } else if (hasher_.Compare(key, value)) {
       return value;
     }
   }
   return nullptr;
 }

 template<class Key, class T, class Hash>
 void HashTable<Key, T, Hash>::Partition::InsertAll(const Partition& src) {
   for (const auto& bucket : src.buckets_) {
     if (bucket.value != nullptr) {
       SLICER_CHECK(Insert(bucket.value));
     }
   }
 }

 // Try to insert into the "insertion table". If that overflows,
 // we allocate a new, larger hash table, move "full table" value to it
 // and "insertion table" becomes the new "full table".
 template<class Key, class T, class Hash>
 void HashTable<Key, T, Hash>::Insert(T* value) {
   assert(Lookup(hasher_.GetKey(value)) == nullptr);
   if (!insertion_table_->Insert(value)) {
     std::unique_ptr<Partition> new_hash_table(
         new Partition(insertion_table_->HashBuckets() * kResizeFactor, hasher_));
     if (full_table_) {
       new_hash_table->InsertAll(*full_table_);
     }
     SLICER_CHECK(new_hash_table->Insert(value));
     full_table_ = std::move(insertion_table_);
     insertion_table_ = std::move(new_hash_table);
   }
 }

 // First look into the "full table" and if the value is
 // not found there look into the "insertion table" next
 template<class Key, class T, class Hash>
 T* HashTable<Key, T, Hash>::Lookup(const Key& key) const {
   auto hash_value = hasher_.Hash(key);
   if (full_table_) {
     auto value = full_table_->Lookup(key, hash_value);
     if (value != nullptr) {
       return value;
     }
   }
   return insertion_table_->Lookup(key, hash_value);
 }

 template<class Key, class T, class Hash>
 void HashTable<Key, T, Hash>::Partition::PrintStats(const char* name, bool verbose) {
   int max_chain_length = 0;
   int sum_chain_length = 0;
   int used_buckets = 0;
   for (Index i = 0; i < hash_buckets_; ++i) {
     if (verbose) printf("%4d : ", i);
     if (buckets_[i].value != nullptr) {
       ++used_buckets;
       int chain_length = 0;
       for (Index ci = i; buckets_[ci].next != kInvalidIndex; ci = buckets_[ci].next) {
         SLICER_CHECK(buckets_[ci].value != nullptr);
         ++chain_length;
         if (verbose) printf("*");
       }
       max_chain_length = std::max(max_chain_length, chain_length);
       sum_chain_length += chain_length;
     }
     if (verbose) printf("\n");
   }

   int avg_chain_length = used_buckets ? sum_chain_length / used_buckets : 0;

   printf("\nHash table partition (%s):\n", name);
   printf("  hash_buckets                   : %u\n", hash_buckets_);
   printf("  size/capacity                  : %zu / %zu\n", buckets_.size(), buckets_.capacity());
   printf("  used_buckets                   : %d\n", used_buckets);
   printf("  max_chain_length               : %d\n", max_chain_length);
   printf("  avg_chain_length               : %d\n", avg_chain_length);
 }

 template<class Key, class T, class Hash>
 void HashTable<Key, T, Hash>::PrintStats(const char* name, bool verbose) {
   printf("\nHash table stats (%s)\n", name);
   if (full_table_) {
     full_table_->PrintStats("full_table", verbose);
   }
   insertion_table_->PrintStats("insertion_table", verbose);
 }

 }  // namespace slicer
	/*
	* Copyright (C) 2017 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#pragma once

	#include <vector>
	#include <cstdint>
	#include <memory>

	namespace slicer {

	// A specialized Key -> T* map (note that, unlike std:: containers, the values
	// are always pointers here, and we don't explicitly store the lookup keys)
	//
	// Implemented as an incrementally resizable hash table: we split the logical hash table
	// into two internal fixed size tables, the "full table" and a "insertion table".
	// When the insertion table overflows, we allocate a larger hashtable to replace
	// it and "insertion table" becomes the "full table" (the old "full table" is
	// rehashed into the new hash table)
	//
	// Similar to open addressing hash tables, all the buckets are a single,
	// contiguous array. But this table is growing and the collisions are still handled
	// as chains (using indexes instead of pointers).
	//
	// The result is faster than std::unordered_map and uses ~25% of
	// the memory used by std::unordered_map<const char, String>
	//
	// The Hash template argument is a type which must implement:
	// 1. hash function : uint32_t Hash(const Key& key)
	// 2. key compare : bool Compare(const Key& key, T* value)
	// 3. key extraction : Key GetKey(T* value)
	// 4. copy semantics
	//
	template<class Key, class T, class Hash>
	class HashTable {
	private:
	// the index type inside the bucket array
	using Index = uint32_t;

	static constexpr Index kInitialHashBuckets = (1 << 7) - 1;
	static constexpr Index kAvgChainLength = 2;
	static constexpr Index kInvalidIndex = static_cast<Index>(-1);
	static constexpr double kResizeFactor = 1.6;

	struct __attribute__((packed)) Bucket {
	T* value = nullptr;
	Index next = kInvalidIndex;
	};

	class Partition {
	public:
	Partition(Index size, const Hash& hasher);
	bool Insert(T* value);
	T* Lookup(const Key& key, uint32_t hash_value) const;
	Index HashBuckets() const { return hash_buckets_; }
	void InsertAll(const Partition& src);
	void PrintStats(const char* name, bool verbose);

	private:
	std::vector<Bucket> buckets_;
	const Index hash_buckets_;
	Hash hasher_;
	};

	public:
	explicit HashTable(const Hash& hasher = Hash()) : hasher_(hasher) {
	// we start with full_table_ == nullptr
	insertion_table_.reset(new Partition(kInitialHashBuckets, hasher_));
	}

	~HashTable() = default;

	// No move or copy semantics
	HashTable(const HashTable&) = delete;
	HashTable& operator=(const HashTable&) = delete;

	// Insert a new, non-nullptr T* into the hash table
	// (we only store unique values so the new value must
	// not be in the table already)
	void Insert(T* value);

	// Lookup an existing value
	// (returns nullptr if the value is not found)
	T* Lookup(const Key& key) const;

	void PrintStats(const char* name, bool verbose);

	private:
	std::unique_ptr<Partition> full_table_;
	std::unique_ptr<Partition> insertion_table_;
	Hash hasher_;
	};

	template<class Key, class T, class Hash>
	HashTable<Key, T, Hash>::Partition::Partition(Index size, const Hash& hasher)
	: hash_buckets_(size), hasher_(hasher) {
	// allocate space for the hash buckets + avg chain length
	buckets_.reserve(hash_buckets_ * kAvgChainLength);
	buckets_.resize(hash_buckets_);
	}

	// Similar to the "cellar" version of coalesced hashing,
	// the buckets array is divided into a fixed set of entries
	// addressable by the hash value [0 .. hash_buckets_) and
	// extra buckets for the collision chains [hash_buckets_, buckets_.size())
	// Unlike coalesced hashing, our "cellar" is growing so we don't actually
	// have to coalesce any chains.
	//
	// Returns true if the insertion succeeded, false if the table overflows
	// (we never insert more than the pre-reserved capacity)
	//
	template<class Key, class T, class Hash>
	bool HashTable<Key, T, Hash>::Partition::Insert(T* value) {
	SLICER_CHECK(value != nullptr);
	// overflow?
	if (buckets_.size() + 1 > buckets_.capacity()) {
	return false;
	}
	auto key = hasher_.GetKey(value);
	Index bucket_index = hasher_.Hash(key) % hash_buckets_;
	if (buckets_[bucket_index].value == nullptr) {
	buckets_[bucket_index].value = value;
	} else {
	Bucket new_bucket = {};
	new_bucket.value = value;
	new_bucket.next = buckets_[bucket_index].next;
	buckets_[bucket_index].next = buckets_.size();
	buckets_.push_back(new_bucket);
	}
	return true;
	}

	template<class Key, class T, class Hash>
	T* HashTable<Key, T, Hash>::Partition::Lookup(const Key& key, uint32_t hash_value) const {
	assert(hash_value == hasher_.Hash(key));
	Index bucket_index = hash_value % hash_buckets_;
	for (Index index = bucket_index; index != kInvalidIndex; index = buckets_[index].next) {
	auto value = buckets_[index].value;
	if (value == nullptr) {
	assert(index < hash_buckets_);
	break;
	} else if (hasher_.Compare(key, value)) {
	return value;
	}
	}
	return nullptr;
	}

	template<class Key, class T, class Hash>
	void HashTable<Key, T, Hash>::Partition::InsertAll(const Partition& src) {
	for (const auto& bucket : src.buckets_) {
	if (bucket.value != nullptr) {
	SLICER_CHECK(Insert(bucket.value));
	}
	}
	}

	// Try to insert into the "insertion table". If that overflows,
	// we allocate a new, larger hash table, move "full table" value to it
	// and "insertion table" becomes the new "full table".
	template<class Key, class T, class Hash>
	void HashTable<Key, T, Hash>::Insert(T* value) {
	assert(Lookup(hasher_.GetKey(value)) == nullptr);
	if (!insertion_table_->Insert(value)) {
	std::unique_ptr<Partition> new_hash_table(
	new Partition(insertion_table_->HashBuckets() * kResizeFactor, hasher_));
	if (full_table_) {
	new_hash_table->InsertAll(*full_table_);
	}
	SLICER_CHECK(new_hash_table->Insert(value));
	full_table_ = std::move(insertion_table_);
	insertion_table_ = std::move(new_hash_table);
	}
	}

	// First look into the "full table" and if the value is
	// not found there look into the "insertion table" next
	template<class Key, class T, class Hash>
	T* HashTable<Key, T, Hash>::Lookup(const Key& key) const {
	auto hash_value = hasher_.Hash(key);
	if (full_table_) {
	auto value = full_table_->Lookup(key, hash_value);
	if (value != nullptr) {
	return value;
	}
	}
	return insertion_table_->Lookup(key, hash_value);
	}

	template<class Key, class T, class Hash>
	void HashTable<Key, T, Hash>::Partition::PrintStats(const char* name, bool verbose) {
	int max_chain_length = 0;
	int sum_chain_length = 0;
	int used_buckets = 0;
	for (Index i = 0; i < hash_buckets_; ++i) {
	if (verbose) printf("%4d : ", i);
	if (buckets_[i].value != nullptr) {
	++used_buckets;
	int chain_length = 0;
	for (Index ci = i; buckets_[ci].next != kInvalidIndex; ci = buckets_[ci].next) {
	SLICER_CHECK(buckets_[ci].value != nullptr);
	++chain_length;
	if (verbose) printf("*");
	}
	max_chain_length = std::max(max_chain_length, chain_length);
	sum_chain_length += chain_length;
	}
	if (verbose) printf("\n");
	}

	int avg_chain_length = used_buckets ? sum_chain_length / used_buckets : 0;

	printf("\nHash table partition (%s):\n", name);
	printf(" hash_buckets : %u\n", hash_buckets_);
	printf(" size/capacity : %zu / %zu\n", buckets_.size(), buckets_.capacity());
	printf(" used_buckets : %d\n", used_buckets);
	printf(" max_chain_length : %d\n", max_chain_length);
	printf(" avg_chain_length : %d\n", avg_chain_length);
	}

	template<class Key, class T, class Hash>
	void HashTable<Key, T, Hash>::PrintStats(const char* name, bool verbose) {
	printf("\nHash table stats (%s)\n", name);
	if (full_table_) {
	full_table_->PrintStats("full_table", verbose);
	}
	insertion_table_->PrintStats("insertion_table", verbose);
	}

	} // namespace slicer