blob: ee891cbc78e4c6914df20d4cd0db6864c1a4c279 [file] [log] [blame]
/* Copyright 2019 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "ruy/prepacked_cache.h"
#include "ruy/mat.h"
#include "ruy/profiler/instrumentation.h"
#include "ruy/system_aligned_alloc.h"
namespace ruy {
namespace {
// Allocates the `data` and `sums` buffers, and sets the corresponding
// pointer fields, in a PEMat whose other fields, particularly `layout`
// and the runtime data types, are already populated.
int AllocateBuffers(PEMat* packed_matrix) {
const int data_bytes = DataBytes(*packed_matrix);
packed_matrix->data = detail::SystemAlignedAlloc(data_bytes);
int sums_bytes = 0;
if (!packed_matrix->sums_type.is_floating_point) {
// Integer quantized matrices also need the `sums` buffer.
sums_bytes = SumsBytes(*packed_matrix);
packed_matrix->sums = detail::SystemAlignedAlloc(sums_bytes);
}
return data_bytes + sums_bytes;
}
// Frees the `data` and `sums` buffers held by a PEMat.
void FreeBuffers(const PEMat& packed_matrix) {
detail::SystemAlignedFree(packed_matrix.data);
detail::SystemAlignedFree(packed_matrix.sums);
}
} // end anonymous namespace
std::size_t PrepackedCache::KeyHash::operator()(
const PrepackedCache::Key& key) const {
std::size_t src_data_hash = reinterpret_cast<std::size_t>(key.src_data);
// Naive hash of the layout. Based on some heuristic reasoning, not any
// benchmarking.
// A choice of hash function here is just an optimization matter
// anyway, since a hash collision only results in some Key::operator== calls
// to disambiguate, and even just returning src_data_hash, ignoring the layout
// altogether, would probably be good enough, as the case of multiple entries
// with the same data pointer will be uncommon.
// Here we multiply-add the layout fields using some small constant prime
// numbers as multipliers. The conventional approach of xor-ing bit-rotations
// would result in some hash collisions because these values are typically
// small positive integers, so bit-rotations are essentially bit-shifts,
// and powers of two are common.
std::size_t packed_layout_hash =
static_cast<int>(key.packed_layout.order) +
static_cast<int>(key.packed_layout.kernel.order) * 2 +
key.packed_layout.stride * 3 + key.packed_layout.kernel.rows * 5 +
key.packed_layout.kernel.cols * 7 + key.packed_layout.rows * 11 +
key.packed_layout.cols * 13;
return src_data_hash ^ packed_layout_hash;
}
PrepackedCache::~PrepackedCache() {
for (const auto& pair : cache_) {
FreeBuffers(pair.second.packed_matrix);
}
}
PrepackedCache::Action PrepackedCache::Get(const void* src_data,
PEMat* packed_matrix) {
// Construct a Key and look up the cache.
Key key;
key.src_data = src_data;
key.packed_layout = packed_matrix->layout;
key.zero_point = packed_matrix->zero_point;
const auto& itr = cache_.find(key);
if (itr != cache_.end()) {
// Found existing entry. Update its timestamp and return it.
itr->second.timestamp = timestamp_++;
*packed_matrix = itr->second.packed_matrix;
return Action::kGotExistingEntry;
}
// No existing entry found. Allocate new buffers now and insert in the cache.
const int new_bytes = AllocateBuffers(packed_matrix);
EjectUntilRoomFor(new_bytes);
Entry entry{*packed_matrix, timestamp_++};
cache_.emplace(key, entry);
buffers_bytes_ += new_bytes;
return Action::kInsertedNewEntry;
}
void PrepackedCache::EjectUntilRoomFor(int new_bytes) {
profiler::ScopeLabel label("PrepackedCacheEjection");
// While we are above the threshold of ejection, eject the LRU entry.
while (!cache_.empty() && buffers_bytes_ + new_bytes > max_buffers_bytes_) {
EjectOne();
}
}
void PrepackedCache::EjectOne() {
auto oldest = cache_.begin();
Timestamp oldest_timestamp = oldest->second.timestamp;
{
for (auto itr = cache_.begin(); itr != cache_.end(); ++itr) {
if (itr->second.timestamp < oldest_timestamp) {
oldest = itr;
oldest_timestamp = itr->second.timestamp;
}
}
}
const PEMat& packed_matrix = oldest->second.packed_matrix;
buffers_bytes_ -= DataBytes(packed_matrix) + SumsBytes(packed_matrix);
FreeBuffers(packed_matrix);
cache_.erase(oldest);
}
} // namespace ruy