| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #pragma once |
| |
| #include <vector> |
| #include <cstdint> |
| #include <memory> |
| |
| namespace slicer { |
| |
| // A specialized Key -> T* map (note that, unlike std:: containers, the values |
| // are always pointers here, and we don't explicitly store the lookup keys) |
| // |
| // Implemented as an incrementally resizable hash table: we split the logical hash table |
| // into two internal fixed size tables, the "full table" and a "insertion table". |
| // When the insertion table overflows, we allocate a larger hashtable to replace |
| // it and "insertion table" becomes the "full table" (the old "full table" is |
| // rehashed into the new hash table) |
| // |
| // Similar to open addressing hash tables, all the buckets are a single, |
| // contiguous array. But this table is growing and the collisions are still handled |
| // as chains (using indexes instead of pointers). |
| // |
| // The result is faster than std::unordered_map and uses ~25% of |
| // the memory used by std::unordered_map<const char*, String*> |
| // |
| // The Hash template argument is a type which must implement: |
| // 1. hash function : uint32_t Hash(const Key& key) |
| // 2. key compare : bool Compare(const Key& key, T* value) |
| // 3. key extraction : Key GetKey(T* value) |
| // 4. copy semantics |
| // |
| template<class Key, class T, class Hash> |
| class HashTable { |
| private: |
| // the index type inside the bucket array |
| using Index = uint32_t; |
| |
| static constexpr Index kInitialHashBuckets = (1 << 7) - 1; |
| static constexpr Index kAvgChainLength = 2; |
| static constexpr Index kInvalidIndex = static_cast<Index>(-1); |
| static constexpr double kResizeFactor = 1.6; |
| |
| struct __attribute__((packed)) Bucket { |
| T* value = nullptr; |
| Index next = kInvalidIndex; |
| }; |
| |
| class Partition { |
| public: |
| Partition(Index size, const Hash& hasher); |
| bool Insert(T* value); |
| T* Lookup(const Key& key, uint32_t hash_value) const; |
| Index HashBuckets() const { return hash_buckets_; } |
| void InsertAll(const Partition& src); |
| void PrintStats(const char* name, bool verbose); |
| |
| private: |
| std::vector<Bucket> buckets_; |
| const Index hash_buckets_; |
| Hash hasher_; |
| }; |
| |
| public: |
| explicit HashTable(const Hash& hasher = Hash()) : hasher_(hasher) { |
| // we start with full_table_ == nullptr |
| insertion_table_.reset(new Partition(kInitialHashBuckets, hasher_)); |
| } |
| |
| ~HashTable() = default; |
| |
| // No move or copy semantics |
| HashTable(const HashTable&) = delete; |
| HashTable& operator=(const HashTable&) = delete; |
| |
| // Insert a new, non-nullptr T* into the hash table |
| // (we only store unique values so the new value must |
| // not be in the table already) |
| void Insert(T* value); |
| |
| // Lookup an existing value |
| // (returns nullptr if the value is not found) |
| T* Lookup(const Key& key) const; |
| |
| void PrintStats(const char* name, bool verbose); |
| |
| private: |
| std::unique_ptr<Partition> full_table_; |
| std::unique_ptr<Partition> insertion_table_; |
| Hash hasher_; |
| }; |
| |
| template<class Key, class T, class Hash> |
| HashTable<Key, T, Hash>::Partition::Partition(Index size, const Hash& hasher) |
| : hash_buckets_(size), hasher_(hasher) { |
| // allocate space for the hash buckets + avg chain length |
| buckets_.reserve(hash_buckets_ * kAvgChainLength); |
| buckets_.resize(hash_buckets_); |
| } |
| |
| // Similar to the "cellar" version of coalesced hashing, |
| // the buckets array is divided into a fixed set of entries |
| // addressable by the hash value [0 .. hash_buckets_) and |
| // extra buckets for the collision chains [hash_buckets_, buckets_.size()) |
| // Unlike coalesced hashing, our "cellar" is growing so we don't actually |
| // have to coalesce any chains. |
| // |
| // Returns true if the insertion succeeded, false if the table overflows |
| // (we never insert more than the pre-reserved capacity) |
| // |
| template<class Key, class T, class Hash> |
| bool HashTable<Key, T, Hash>::Partition::Insert(T* value) { |
| SLICER_CHECK(value != nullptr); |
| // overflow? |
| if (buckets_.size() + 1 > buckets_.capacity()) { |
| return false; |
| } |
| auto key = hasher_.GetKey(value); |
| Index bucket_index = hasher_.Hash(key) % hash_buckets_; |
| if (buckets_[bucket_index].value == nullptr) { |
| buckets_[bucket_index].value = value; |
| } else { |
| Bucket new_bucket = {}; |
| new_bucket.value = value; |
| new_bucket.next = buckets_[bucket_index].next; |
| buckets_[bucket_index].next = buckets_.size(); |
| buckets_.push_back(new_bucket); |
| } |
| return true; |
| } |
| |
| template<class Key, class T, class Hash> |
| T* HashTable<Key, T, Hash>::Partition::Lookup(const Key& key, uint32_t hash_value) const { |
| assert(hash_value == hasher_.Hash(key)); |
| Index bucket_index = hash_value % hash_buckets_; |
| for (Index index = bucket_index; index != kInvalidIndex; index = buckets_[index].next) { |
| auto value = buckets_[index].value; |
| if (value == nullptr) { |
| assert(index < hash_buckets_); |
| break; |
| } else if (hasher_.Compare(key, value)) { |
| return value; |
| } |
| } |
| return nullptr; |
| } |
| |
| template<class Key, class T, class Hash> |
| void HashTable<Key, T, Hash>::Partition::InsertAll(const Partition& src) { |
| for (const auto& bucket : src.buckets_) { |
| if (bucket.value != nullptr) { |
| SLICER_CHECK(Insert(bucket.value)); |
| } |
| } |
| } |
| |
| // Try to insert into the "insertion table". If that overflows, |
| // we allocate a new, larger hash table, move "full table" value to it |
| // and "insertion table" becomes the new "full table". |
| template<class Key, class T, class Hash> |
| void HashTable<Key, T, Hash>::Insert(T* value) { |
| assert(Lookup(hasher_.GetKey(value)) == nullptr); |
| if (!insertion_table_->Insert(value)) { |
| std::unique_ptr<Partition> new_hash_table( |
| new Partition(insertion_table_->HashBuckets() * kResizeFactor, hasher_)); |
| if (full_table_) { |
| new_hash_table->InsertAll(*full_table_); |
| } |
| SLICER_CHECK(new_hash_table->Insert(value)); |
| full_table_ = std::move(insertion_table_); |
| insertion_table_ = std::move(new_hash_table); |
| } |
| } |
| |
| // First look into the "full table" and if the value is |
| // not found there look into the "insertion table" next |
| template<class Key, class T, class Hash> |
| T* HashTable<Key, T, Hash>::Lookup(const Key& key) const { |
| auto hash_value = hasher_.Hash(key); |
| if (full_table_) { |
| auto value = full_table_->Lookup(key, hash_value); |
| if (value != nullptr) { |
| return value; |
| } |
| } |
| return insertion_table_->Lookup(key, hash_value); |
| } |
| |
| template<class Key, class T, class Hash> |
| void HashTable<Key, T, Hash>::Partition::PrintStats(const char* name, bool verbose) { |
| int max_chain_length = 0; |
| int sum_chain_length = 0; |
| int used_buckets = 0; |
| for (Index i = 0; i < hash_buckets_; ++i) { |
| if (verbose) printf("%4d : ", i); |
| if (buckets_[i].value != nullptr) { |
| ++used_buckets; |
| int chain_length = 0; |
| for (Index ci = i; buckets_[ci].next != kInvalidIndex; ci = buckets_[ci].next) { |
| SLICER_CHECK(buckets_[ci].value != nullptr); |
| ++chain_length; |
| if (verbose) printf("*"); |
| } |
| max_chain_length = std::max(max_chain_length, chain_length); |
| sum_chain_length += chain_length; |
| } |
| if (verbose) printf("\n"); |
| } |
| |
| int avg_chain_length = used_buckets ? sum_chain_length / used_buckets : 0; |
| |
| printf("\nHash table partition (%s):\n", name); |
| printf(" hash_buckets : %u\n", hash_buckets_); |
| printf(" size/capacity : %zu / %zu\n", buckets_.size(), buckets_.capacity()); |
| printf(" used_buckets : %d\n", used_buckets); |
| printf(" max_chain_length : %d\n", max_chain_length); |
| printf(" avg_chain_length : %d\n", avg_chain_length); |
| } |
| |
| template<class Key, class T, class Hash> |
| void HashTable<Key, T, Hash>::PrintStats(const char* name, bool verbose) { |
| printf("\nHash table stats (%s)\n", name); |
| if (full_table_) { |
| full_table_->PrintStats("full_table", verbose); |
| } |
| insertion_table_->PrintStats("insertion_table", verbose); |
| } |
| |
| } // namespace slicer |