blob: 8245a5c52d22e965d3121d5d2cb8fa52ad85b2a0 [file] [log] [blame]
/* Copyright 2019 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "ruy/block_map.h"
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <vector>
#include "ruy/cpu_cache_params.h"
#include "ruy/gtest_wrapper.h"
#include "ruy/platform.h"
#include "ruy/side_pair.h"
namespace ruy {
namespace {
#if RUY_PLATFORM_NEON_64
// Unless otherwise specified, these tests have been tuned on ARM Cortex-A55.
void MakeBlockMapTuningTest(int rows, int cols, int depth, int kernel_rows,
int kernel_cols, int lhs_scalar_size,
int rhs_scalar_size, int tentative_thread_count,
int expected_num_blocks_base_log2,
int expected_rectangularness_log2) {
// Plausible Cortex-A55 cache sizes.
CpuCacheParams cpu_cache_params;
cpu_cache_params.local_cache_size = 128 * 1024;
cpu_cache_params.last_level_cache_size = 1024 * 1024;
BlockMap block_map;
MakeBlockMap(rows, cols, depth, kernel_rows, kernel_cols, lhs_scalar_size,
rhs_scalar_size, tentative_thread_count, cpu_cache_params,
&block_map);
EXPECT_EQ(block_map.num_blocks_base_log2, expected_num_blocks_base_log2);
EXPECT_EQ(std::min(block_map.rectangularness_log2[Side::kLhs],
block_map.rectangularness_log2[Side::kRhs]),
0);
EXPECT_EQ(std::max(block_map.rectangularness_log2[Side::kLhs],
block_map.rectangularness_log2[Side::kRhs]),
expected_rectangularness_log2);
}
TEST(BlockMapTest, MakeBlockMapTuningTest8bitCubicShapesOneThreadNeonDotprod) {
MakeBlockMapTuningTest(32, 32, 32, 8, 8, 1, 1, /* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(48, 48, 48, 8, 8, 1, 1, /* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(64, 64, 64, 8, 8, 1, 1, /* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(96, 96, 96, 8, 8, 1, 1, /* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(128, 128, 128, 8, 8, 1, 1,
/* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(192, 192, 192, 8, 8, 1, 1,
/* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(256, 256, 256, 8, 8, 1, 1,
/* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(384, 384, 384, 8, 8, 1, 1,
/* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
}
TEST(BlockMapTest,
MakeBlockMapTuningTest8bitCubicShapesFourThreadsNeonDotprod) {
MakeBlockMapTuningTest(32, 32, 32, 8, 8, 1, 1, /* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(48, 48, 48, 8, 8, 1, 1, /* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(64, 64, 64, 8, 8, 1, 1, /* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(96, 96, 96, 8, 8, 1, 1, /* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(128, 128, 128, 8, 8, 1, 1,
/* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(192, 192, 192, 8, 8, 1, 1,
/* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 1,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(256, 256, 256, 8, 8, 1, 1,
/* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 2,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(384, 384, 384, 8, 8, 1, 1,
/* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 2,
/* expected_rectangularness_log2 */ 0);
}
TEST(BlockMapTest, MakeBlockMapTuningTest32bit) {
MakeBlockMapTuningTest(256, 256, 256, 8, 8, 4, 4,
/* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 3,
/* expected_rectangularness_log2 */ 0);
MakeBlockMapTuningTest(4096, 4096, 4096, 8, 8, 4, 4,
/* tentative_thread_count */ 4,
/* expected_num_blocks_base_log2 */ 7,
/* expected_rectangularness_log2 */ 0);
}
TEST(BlockMapTest, MakeBlockMapTuningTestRectangular) {
MakeBlockMapTuningTest(256, 16, 256, 8, 8, 1, 1,
/* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 3);
MakeBlockMapTuningTest(24, 2400, 256, 8, 8, 1, 1,
/* tentative_thread_count */ 1,
/* expected_num_blocks_base_log2 */ 0,
/* expected_rectangularness_log2 */ 6);
}
#endif
int L1Distance(const SidePair<int>& a, const SidePair<int>& b) {
return std::abs(a[Side::kLhs] - b[Side::kLhs]) +
std::abs(a[Side::kRhs] - b[Side::kRhs]);
}
void GetBlockByIndexSquareTest(int num_blocks_base_log2,
BlockMapTraversalOrder traversal_order) {
// Arbitrary, does not affect this test. 3 is just a typical value.
constexpr int kKernelSizeLog2 = 3;
const int size_log2 = num_blocks_base_log2 + kKernelSizeLog2;
BlockMap block_map;
block_map.thread_count = 1;
block_map.traversal_order = traversal_order;
block_map.num_blocks_base_log2 = num_blocks_base_log2;
for (Side side : {Side::kLhs, Side::kRhs}) {
block_map.dims[side] = 1 << size_log2;
block_map.rectangularness_log2[side] = 0;
block_map.kernel_dims[side] = 1 << kKernelSizeLog2;
block_map.small_block_dims[side] = block_map.kernel_dims[side];
block_map.large_blocks[side] = 0;
}
const int num_blocks_per_side = 1 << num_blocks_base_log2;
const int num_blocks = num_blocks_per_side * num_blocks_per_side;
EXPECT_EQ(num_blocks, NumBlocks(block_map));
// Perform a full traversal of all blocks, as if computing a whole matrix
// multiplication.
//
// Used to record how many times each block was hit by the traversal.
std::vector<int> block_hit_counts(num_blocks);
// Here we guard an assumption that all traversal orders start at (0, 0).
SidePair<int> previous_block_coords(0, 0);
// Sum of L1 norm of the coordinate change at every step of the traversal.
std::int64_t total_l1_distance = 0;
// Number of jumps i.e. traversal steps with a L1 norm greater than 1.
int discontinuity_count = 0;
for (int block_index = 0; block_index < num_blocks; block_index++) {
SidePair<int> block_coords;
GetBlockByIndex(block_map, block_index, &block_coords);
++block_hit_counts[block_coords[Side::kLhs] +
num_blocks_per_side * block_coords[Side::kRhs]];
int distance = L1Distance(block_coords, previous_block_coords);
total_l1_distance += distance;
discontinuity_count += (distance > 1);
previous_block_coords = block_coords;
}
// Verify that each block was traversed exactly once.
for (int l = 0; l < num_blocks_per_side; l++) {
for (int r = 0; r < num_blocks_per_side; r++) {
EXPECT_EQ(block_hit_counts[l + num_blocks_per_side * r], 1);
}
}
// Verify that the discontinuity_count and total_l1_distance are as expected
// for the given traversal_order.
switch (traversal_order) {
case BlockMapTraversalOrder::kFractalHilbert:
// No discontinuity at all with this space-filling continuous curve!
EXPECT_EQ(discontinuity_count, 0);
// Therefore, total_l1_distance has to be the number of blocks minus one.
EXPECT_EQ(total_l1_distance, num_blocks - 1);
break;
case BlockMapTraversalOrder::kLinear:
EXPECT_EQ(discontinuity_count, num_blocks_per_side - 1);
EXPECT_EQ(total_l1_distance,
2 * num_blocks_per_side * (num_blocks_per_side - 1));
break;
case BlockMapTraversalOrder::kFractalZ:
EXPECT_EQ(discontinuity_count, num_blocks > 1 ? (num_blocks / 2 - 1) : 0);
EXPECT_EQ(total_l1_distance,
2 * num_blocks_per_side * (num_blocks_per_side - 1));
break;
case BlockMapTraversalOrder::kFractalU: {
if (num_blocks_base_log2 == 0) {
EXPECT_EQ(discontinuity_count, 0);
EXPECT_EQ(total_l1_distance, 0);
} else {
int expected_discontinuity_count = 0;
int expected_total_l1_distance = 3;
for (int i = 2; i <= num_blocks_base_log2; i++) {
expected_discontinuity_count = 4 * expected_discontinuity_count + 2;
expected_total_l1_distance =
4 * expected_total_l1_distance + (1 << (i + 1)) - 1;
}
EXPECT_EQ(discontinuity_count, expected_discontinuity_count);
EXPECT_EQ(total_l1_distance, expected_total_l1_distance);
}
break;
}
default:
abort();
}
}
TEST(BlockMapTest, GetBlockByIndexSquare) {
for (int num_blocks_base_log2 = 0; num_blocks_base_log2 <= 10;
num_blocks_base_log2++) {
for (BlockMapTraversalOrder traversal_order :
{BlockMapTraversalOrder::kLinear, BlockMapTraversalOrder::kFractalZ,
BlockMapTraversalOrder::kFractalU,
BlockMapTraversalOrder::kFractalHilbert}) {
GetBlockByIndexSquareTest(num_blocks_base_log2, traversal_order);
}
}
}
} // namespace
} // namespace ruy
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}