| // Copyright 2021 The libgav1 Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "src/dsp/warp.h" |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cmath> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <ostream> |
| #include <string> |
| #include <type_traits> |
| |
| #include "absl/base/macros.h" |
| #include "absl/strings/match.h" |
| #include "absl/strings/str_format.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/time/clock.h" |
| #include "absl/time/time.h" |
| #include "gtest/gtest.h" |
| #include "src/dsp/constants.h" |
| #include "src/dsp/dsp.h" |
| #include "src/post_filter.h" |
| #include "src/utils/common.h" |
| #include "src/utils/constants.h" |
| #include "src/utils/cpu.h" |
| #include "src/utils/memory.h" |
| #include "tests/block_utils.h" |
| #include "tests/third_party/libvpx/acm_random.h" |
| #include "tests/utils.h" |
| |
| namespace libgav1 { |
| namespace dsp { |
| namespace { |
| |
| constexpr int kSourceBorderHorizontal = 16; |
| constexpr int kSourceBorderVertical = 13; |
| |
| constexpr int kMaxSourceBlockWidth = |
| kMaxSuperBlockSizeInPixels + kSourceBorderHorizontal * 2; |
| constexpr int kMaxSourceBlockHeight = |
| kMaxSuperBlockSizeInPixels + kSourceBorderVertical * 2; |
| constexpr int kMaxDestBlockWidth = |
| kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2; |
| constexpr int kMaxDestBlockHeight = |
| kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2; |
| |
| constexpr uint16_t kDivisorLookup[257] = { |
| 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768, |
| 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142, |
| 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564, |
| 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028, |
| 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530, |
| 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066, |
| 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633, |
| 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228, |
| 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848, |
| 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491, |
| 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155, |
| 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838, |
| 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538, |
| 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255, |
| 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986, |
| 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732, |
| 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489, |
| 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259, |
| 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039, |
| 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830, |
| 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630, |
| 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439, |
| 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257, |
| 8240, 8224, 8208, 8192}; |
| |
| template <bool is_compound> |
| const char* GetDigest8bpp(int id) { |
| static const char* const kDigest[] = { |
| "77ba358a0f5e19a8e69fa0a95712578e", "141b23d13a04e0b84d26d514de76d6b0", |
| "b0265858454b979852ffadae323f0fb7", "9cf38e3579265b656f1f2100ba15b0e9", |
| "ab51d05cc255ef8e37921182df1d89b1", "e3e96f90a4b07ca733e40f057dc01c41", |
| "4eee8c1a52a62a266db9b1c9338e124c", "901a87d8f88f6324dbc0960a6de861ac", |
| "da9cb6faf6adaeeae12b6784f39186c5", "14450ab05536cdb0d2f499716ccb559d", |
| "566b396cbf008bbb869b364fdc81860d", "681a872baf2de4e58d73ea9ab8643a72", |
| "7f17d290d513a7416761b3a01f10fd2f", |
| }; |
| static const char* const kCompoundDigest[] = { |
| "7e9339d265b7beac7bbe32fe7bb0fccb", "f747d663b427bb38a3ff36b0815a394c", |
| "858cf54d2253281a919fbdb48fe91c53", "4721dd97a212c6068bd488f400259afc", |
| "36878c7906492bc740112abdea77616f", "89deb68aa35764bbf3024b501a6bed50", |
| "8ac5b08f9b2afd38143c357646af0f82", "bf6e2a64835ea0c9d7467394253d0eb2", |
| "7b0a539acd2a27eff398dd084abad933", "61c8d81b397c1cf727ff8a9fabab90af", |
| "4d412349a25a832c1fb3fb29e3f0e2b3", "2c6dd2a9a4ede9fa00adb567ba646f30", |
| "b2a0ce68db3cadd207299f73112bed74", |
| }; |
| assert(id >= 0); |
| assert(id < sizeof(kDigest) / sizeof(kDigest[0])); |
| return is_compound ? kCompoundDigest[id] : kDigest[id]; |
| } |
| |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| template <bool is_compound> |
| const char* GetDigest10bpp(int id) { |
| static const char* const kDigest[] = { |
| "1fef54f56a0bafccf7f8da1ac3b18b76", "8a65c72f171feafa2f393d31d6b7fe1b", |
| "808019346f2f1f45f8cf2e9fc9a49320", "c28e2f2c6c830a29bcc2452166cba521", |
| "f040674d6f54e8910d655f0d11fd8cdd", "473af9bb1c6023965c2284b716feef97", |
| "e4f6d7babd0813d5afb0f575ebfa8166", "58f96ef8a880963a213624bb0d06d47c", |
| "1ec0995fa4490628b679d03683233388", "9526fb102fde7dc1a7e160e65af6da33", |
| "f0457427d0c0e31d82ea4f612f7f86f1", "ddc82ae298cccebad493ba9de0f69fbd", |
| "5ed615091e2f62df26de7e91a985cb81", |
| }; |
| static const char* const kCompoundDigest[] = { |
| "8e6986ae143260e0b8b4887f15a141a1", "0a7f0db8316b8c3569f08834dd0c6f50", |
| "90705b2e7dbe083e8a1f70f29d6f257e", "e428a75bea77d769d21f3f7a1d2b0b38", |
| "a570b13d790c085c4ab50d71dd085d56", "e5d043c6cd6ff6dbab6e38a8877e93bd", |
| "12ea96991e46e3e9aa78ab812ffa0525", "84293a94a53f1cf814fa25e793c3fe27", |
| "b98a7502c84ac8437266f702dcc0a92e", "d8db5d52e9b0a5be0ad2d517d5bd16e9", |
| "f3be504bbb609ce4cc71c5539252638a", "fcde83b54e14e9de23460644f244b047", |
| "42eb66e752e9ef289b47053b5c73fdd6", |
| }; |
| assert(id >= 0); |
| assert(id < sizeof(kDigest) / sizeof(kDigest[0])); |
| return is_compound ? kCompoundDigest[id] : kDigest[id]; |
| } |
| #endif // LIBGAV1_MAX_BITDEPTH >= 10 |
| |
| #if LIBGAV1_MAX_BITDEPTH == 12 |
| template <bool is_compound> |
| const char* GetDigest12bpp(int id) { |
| static const char* const kDigest[] = { |
| "cd5d5e2102b8917ad70778f523d24bdf", "374a5f1b53a3fdf2eefa741eb71e6889", |
| "311636841770ec2427084891df96bee5", "c40c537917b1f0d1d84c99dfcecd8219", |
| "a1d9bb920e6c3d20c0cf84adc18e1f15", "13b5659acdb39b717526cb358c6f4026", |
| "f81ea4f6fd1f4ebed1262e3fae37b5bb", "c1452fefcd9b9562fe3a0b7f9302809c", |
| "8fed8a3159dc7b6b59a39ab2be6bee13", "b46458bc0e5cf1cee92aac4f0f608749", |
| "2e6a1039ab111add89f5b44b13565f40", "9c666691860bdc89b03f601b40126196", |
| "418a47157d992b94c302ca2e2f6ee07e", |
| }; |
| static const char* const kCompoundDigest[] = { |
| "8e6986ae143260e0b8b4887f15a141a1", "0a7f0db8316b8c3569f08834dd0c6f50", |
| "90705b2e7dbe083e8a1f70f29d6f257e", "e428a75bea77d769d21f3f7a1d2b0b38", |
| "a570b13d790c085c4ab50d71dd085d56", "e5d043c6cd6ff6dbab6e38a8877e93bd", |
| "12ea96991e46e3e9aa78ab812ffa0525", "84293a94a53f1cf814fa25e793c3fe27", |
| "b98a7502c84ac8437266f702dcc0a92e", "d8db5d52e9b0a5be0ad2d517d5bd16e9", |
| "f3be504bbb609ce4cc71c5539252638a", "fcde83b54e14e9de23460644f244b047", |
| "42eb66e752e9ef289b47053b5c73fdd6", |
| }; |
| assert(id >= 0); |
| assert(id < sizeof(kDigest) / sizeof(kDigest[0])); |
| return is_compound ? kCompoundDigest[id] : kDigest[id]; |
| } |
| #endif // LIBGAV1_MAX_BITDEPTH == 12 |
| |
| int RandomWarpedParam(int seed_offset, int bits) { |
| libvpx_test::ACMRandom rnd(seed_offset + |
| libvpx_test::ACMRandom::DeterministicSeed()); |
| // 1 in 8 chance of generating zero (arbitrary). |
| const bool zero = (rnd.Rand16() & 7) == 0; |
| if (zero) return 0; |
| // Generate uniform values in the range [-(1 << bits), 1] U [1, 1 << |
| // bits]. |
| const int mask = (1 << bits) - 1; |
| const int value = 1 + (rnd.RandRange(1u << 31) & mask); |
| const bool sign = (rnd.Rand16() & 1) != 0; |
| return sign ? value : -value; |
| } |
| |
| // This function is a copy from warp_prediction.cc. |
| template <typename T> |
| void GenerateApproximateDivisor(T value, int16_t* division_factor, |
| int16_t* division_shift) { |
| const int n = FloorLog2(std::abs(value)); |
| const T e = std::abs(value) - (static_cast<T>(1) << n); |
| const int entry = (n > kDivisorLookupBits) |
| ? RightShiftWithRounding(e, n - kDivisorLookupBits) |
| : static_cast<int>(e << (kDivisorLookupBits - n)); |
| *division_shift = n + kDivisorLookupPrecisionBits; |
| *division_factor = |
| (value < 0) ? -kDivisorLookup[entry] : kDivisorLookup[entry]; |
| } |
| |
| // This function is a copy from warp_prediction.cc. |
| int16_t GetShearParameter(int value) { |
| return static_cast<int16_t>( |
| LeftShift(RightShiftWithRoundingSigned(value, kWarpParamRoundingBits), |
| kWarpParamRoundingBits)); |
| } |
| |
| // This function is a copy from warp_prediction.cc. |
| // This function is used here to help generate valid warp parameters. |
| bool SetupShear(const int* params, int16_t* alpha, int16_t* beta, |
| int16_t* gamma, int16_t* delta) { |
| int16_t division_shift; |
| int16_t division_factor; |
| GenerateApproximateDivisor<int32_t>(params[2], &division_factor, |
| &division_shift); |
| const int alpha0 = |
| Clip3(params[2] - (1 << kWarpedModelPrecisionBits), INT16_MIN, INT16_MAX); |
| const int beta0 = Clip3(params[3], INT16_MIN, INT16_MAX); |
| const int64_t v = LeftShift(params[4], kWarpedModelPrecisionBits); |
| const int gamma0 = |
| Clip3(RightShiftWithRoundingSigned(v * division_factor, division_shift), |
| INT16_MIN, INT16_MAX); |
| const int64_t w = static_cast<int64_t>(params[3]) * params[4]; |
| const int delta0 = Clip3( |
| params[5] - |
| RightShiftWithRoundingSigned(w * division_factor, division_shift) - |
| (1 << kWarpedModelPrecisionBits), |
| INT16_MIN, INT16_MAX); |
| |
| *alpha = GetShearParameter(alpha0); |
| *beta = GetShearParameter(beta0); |
| *gamma = GetShearParameter(gamma0); |
| *delta = GetShearParameter(delta0); |
| if ((4 * std::abs(*alpha) + 7 * std::abs(*beta) >= |
| (1 << kWarpedModelPrecisionBits)) || |
| (4 * std::abs(*gamma) + 4 * std::abs(*delta) >= |
| (1 << kWarpedModelPrecisionBits))) { |
| return false; // NOLINT (easier condition to understand). |
| } |
| |
| return true; |
| } |
| |
| void GenerateWarpedModel(int* params, int16_t* alpha, int16_t* beta, |
| int16_t* gamma, int16_t* delta, int seed) { |
| do { |
| params[0] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6); |
| params[1] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6); |
| params[2] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) + |
| (1 << kWarpedModelPrecisionBits); |
| params[3] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3); |
| params[4] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3); |
| params[5] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) + |
| (1 << kWarpedModelPrecisionBits); |
| ++seed; |
| } while (params[2] == 0 || !SetupShear(params, alpha, beta, gamma, delta)); |
| } |
| |
| struct WarpTestParam { |
| WarpTestParam(int width, int height) : width(width), height(height) {} |
| int width; |
| int height; |
| }; |
| |
| template <bool is_compound, int bitdepth, typename Pixel> |
| class WarpTest : public testing::TestWithParam<WarpTestParam> { |
| public: |
| static_assert(bitdepth >= kBitdepth8 && bitdepth <= LIBGAV1_MAX_BITDEPTH, ""); |
| WarpTest() = default; |
| ~WarpTest() override = default; |
| |
| void SetUp() override { |
| test_utils::ResetDspTable(bitdepth); |
| WarpInit_C(); |
| const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth); |
| ASSERT_NE(dsp, nullptr); |
| const testing::TestInfo* const test_info = |
| testing::UnitTest::GetInstance()->current_test_info(); |
| const absl::string_view test_case = test_info->test_suite_name(); |
| if (absl::StartsWith(test_case, "C/")) { |
| } else if (absl::StartsWith(test_case, "NEON/")) { |
| WarpInit_NEON(); |
| } else if (absl::StartsWith(test_case, "SSE41/")) { |
| WarpInit_SSE4_1(); |
| } else { |
| FAIL() << "Unrecognized architecture prefix in test case name: " |
| << test_case; |
| } |
| func_ = is_compound ? dsp->warp_compound : dsp->warp; |
| } |
| |
| protected: |
| using DestType = |
| typename std::conditional<is_compound, uint16_t, Pixel>::type; |
| |
| void SetInputData(bool use_fixed_values, int value); |
| void Test(bool use_fixed_values, int value, int num_runs = 1); |
| void TestFixedValues(); |
| void TestRandomValues(); |
| void TestSpeed(); |
| |
| const WarpTestParam param_ = GetParam(); |
| |
| private: |
| int warp_params_[8]; |
| dsp::WarpFunc func_; |
| // Warp filters are 7-tap, which needs 3 pixels (kConvolveBorderLeftTop) |
| // padding. Destination buffer indices are based on subsampling values (x+y): |
| // 0: (4:4:4), 1:(4:2:2), 2: (4:2:0). |
| Pixel source_[kMaxSourceBlockHeight * kMaxSourceBlockWidth] = {}; |
| DestType dest_[3][kMaxDestBlockHeight * kMaxDestBlockWidth] = {}; |
| }; |
| |
| template <bool is_compound, int bitdepth, typename Pixel> |
| void WarpTest<is_compound, bitdepth, Pixel>::SetInputData(bool use_fixed_values, |
| int value) { |
| if (use_fixed_values) { |
| for (int y = 0; y < param_.height; ++y) { |
| const int row = kSourceBorderVertical + y; |
| Memset(source_ + row * kMaxSourceBlockWidth + kSourceBorderHorizontal, |
| value, param_.width); |
| } |
| } else { |
| const int mask = (1 << bitdepth) - 1; |
| libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed()); |
| for (int y = 0; y < param_.height; ++y) { |
| const int row = kSourceBorderVertical + y; |
| for (int x = 0; x < param_.width; ++x) { |
| const int column = kSourceBorderHorizontal + x; |
| source_[row * kMaxSourceBlockWidth + column] = rnd.Rand16() & mask; |
| } |
| } |
| } |
| PostFilter::ExtendFrame<Pixel>( |
| &source_[kSourceBorderVertical * kMaxSourceBlockWidth + |
| kSourceBorderHorizontal], |
| param_.width, param_.height, kMaxSourceBlockWidth, |
| kSourceBorderHorizontal, kSourceBorderHorizontal, kSourceBorderVertical, |
| kSourceBorderVertical); |
| } |
| |
| template <bool is_compound, int bitdepth, typename Pixel> |
| void WarpTest<is_compound, bitdepth, Pixel>::Test(bool use_fixed_values, |
| int value, |
| int num_runs /*= 1*/) { |
| if (func_ == nullptr) return; |
| SetInputData(use_fixed_values, value); |
| libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed()); |
| const int source_offset = |
| kSourceBorderVertical * kMaxSourceBlockWidth + kSourceBorderHorizontal; |
| const int dest_offset = |
| kConvolveBorderLeftTop * kMaxDestBlockWidth + kConvolveBorderLeftTop; |
| const Pixel* const src = source_ + source_offset; |
| const ptrdiff_t src_stride = kMaxSourceBlockWidth * sizeof(Pixel); |
| const ptrdiff_t dst_stride = |
| is_compound ? kMaxDestBlockWidth : kMaxDestBlockWidth * sizeof(Pixel); |
| |
| absl::Duration elapsed_time; |
| for (int subsampling_x = 0; subsampling_x <= 1; ++subsampling_x) { |
| for (int subsampling_y = 0; subsampling_y <= 1; ++subsampling_y) { |
| if (subsampling_x == 0 && subsampling_y == 1) { |
| // When both are 0: 4:4:4 |
| // When both are 1: 4:2:0 |
| // When only |subsampling_x| is 1: 4:2:2 |
| // Having only |subsampling_y| == 1 is unsupported. |
| continue; |
| } |
| int params[8]; |
| int16_t alpha; |
| int16_t beta; |
| int16_t gamma; |
| int16_t delta; |
| GenerateWarpedModel(params, &alpha, &beta, &gamma, &delta, rnd.Rand8()); |
| |
| const int dest_id = subsampling_x + subsampling_y; |
| DestType* const dst = dest_[dest_id] + dest_offset; |
| const absl::Time start = absl::Now(); |
| for (int n = 0; n < num_runs; ++n) { |
| func_(src, src_stride, param_.width, param_.height, params, |
| subsampling_x, subsampling_y, 0, 0, param_.width, param_.height, |
| alpha, beta, gamma, delta, dst, dst_stride); |
| } |
| elapsed_time += absl::Now() - start; |
| } |
| } |
| |
| if (use_fixed_values) { |
| // For fixed values, input and output are identical. |
| for (size_t i = 0; i < ABSL_ARRAYSIZE(dest_); ++i) { |
| // |is_compound| holds a few more bits of precision and an offset value. |
| Pixel compensated_dest[kMaxDestBlockWidth * kMaxDestBlockHeight]; |
| const int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset; |
| if (is_compound) { |
| for (int y = 0; y < param_.height; ++y) { |
| for (int x = 0; x < param_.width; ++x) { |
| const int compound_value = |
| dest_[i][dest_offset + y * kMaxDestBlockWidth + x]; |
| const int remove_offset = compound_value - compound_offset; |
| const int full_shift = |
| remove_offset >> |
| (kInterRoundBitsVertical - kInterRoundBitsCompoundVertical); |
| compensated_dest[y * kMaxDestBlockWidth + x] = |
| Clip3(full_shift, 0, (1 << bitdepth) - 1); |
| } |
| } |
| } |
| Pixel* pixel_dest = |
| is_compound ? compensated_dest |
| : reinterpret_cast<Pixel*>(dest_[i] + dest_offset); |
| const bool success = test_utils::CompareBlocks( |
| src, pixel_dest, param_.width, param_.height, kMaxSourceBlockWidth, |
| kMaxDestBlockWidth, false); |
| EXPECT_TRUE(success) << "subsampling_x + subsampling_y: " << i; |
| } |
| } else { |
| // (width, height): |
| // (8, 8), id = 0. (8, 16), id = 1. (16, 8), id = 2. |
| // (16, 16), id = 3. (16, 32), id = 4. (32, 16), id = 5. |
| // ... |
| // (128, 128), id = 12. |
| int id; |
| if (param_.width == param_.height) { |
| id = 3 * static_cast<int>(FloorLog2(param_.width) - 3); |
| } else if (param_.width < param_.height) { |
| id = 1 + 3 * static_cast<int>(FloorLog2(param_.width) - 3); |
| } else { |
| id = 2 + 3 * static_cast<int>(FloorLog2(param_.height) - 3); |
| } |
| |
| const char* expected_digest = nullptr; |
| switch (bitdepth) { |
| case 8: |
| expected_digest = GetDigest8bpp<is_compound>(id); |
| break; |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| case 10: |
| expected_digest = GetDigest10bpp<is_compound>(id); |
| break; |
| #endif |
| #if LIBGAV1_MAX_BITDEPTH == 12 |
| case 12: |
| expected_digest = GetDigest12bpp<is_compound>(id); |
| break; |
| #endif |
| } |
| ASSERT_NE(expected_digest, nullptr); |
| test_utils::CheckMd5Digest( |
| "Warp", absl::StrFormat("%dx%d", param_.width, param_.height).c_str(), |
| expected_digest, dest_, sizeof(dest_), elapsed_time); |
| } |
| } |
| |
| template <bool is_compound, int bitdepth, typename Pixel> |
| void WarpTest<is_compound, bitdepth, Pixel>::TestFixedValues() { |
| Test(true, 0); |
| Test(true, 1); |
| Test(true, 128); |
| Test(true, (1 << bitdepth) - 1); |
| } |
| |
| template <bool is_compound, int bitdepth, typename Pixel> |
| void WarpTest<is_compound, bitdepth, Pixel>::TestRandomValues() { |
| Test(false, 0); |
| } |
| |
| template <bool is_compound, int bitdepth, typename Pixel> |
| void WarpTest<is_compound, bitdepth, Pixel>::TestSpeed() { |
| const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height)); |
| Test(false, 0, num_runs); |
| } |
| |
| void ApplyFilterToSignedInput(const int min_input, const int max_input, |
| const int8_t filter[kSubPixelTaps], |
| int* min_output, int* max_output) { |
| int min = 0, max = 0; |
| for (int i = 0; i < kSubPixelTaps; ++i) { |
| const int tap = filter[i]; |
| if (tap > 0) { |
| max += max_input * tap; |
| min += min_input * tap; |
| } else { |
| min += max_input * tap; |
| max += min_input * tap; |
| } |
| } |
| *min_output = min; |
| *max_output = max; |
| } |
| |
| void ApplyFilterToUnsignedInput(const int max_input, |
| const int8_t filter[kSubPixelTaps], |
| int* min_output, int* max_output) { |
| ApplyFilterToSignedInput(0, max_input, filter, min_output, max_output); |
| } |
| |
| // Validate the maximum ranges for different parts of the Warp process. |
| template <int bitdepth> |
| void ShowRange() { |
| constexpr int horizontal_bits = (bitdepth == kBitdepth12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| constexpr int vertical_bits = (bitdepth == kBitdepth12) |
| ? kInterRoundBitsVertical12bpp |
| : kInterRoundBitsVertical; |
| constexpr int compound_vertical_bits = kInterRoundBitsCompoundVertical; |
| |
| constexpr int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset; |
| |
| constexpr int max_input = (1 << bitdepth) - 1; |
| |
| const int8_t* worst_warp_filter = kWarpedFilters8[93]; |
| |
| // First pass. |
| printf("Bitdepth: %2d Input range: [%8d, %8d]\n", bitdepth, 0, |
| max_input); |
| |
| int min = 0, max = 0; |
| ApplyFilterToUnsignedInput(max_input, worst_warp_filter, &min, &max); |
| |
| int first_pass_offset; |
| if (bitdepth == 8) { |
| // Derive an offset for 8 bit. |
| for (first_pass_offset = 1; - first_pass_offset > min; |
| first_pass_offset <<= 1) { |
| } |
| printf(" 8bpp intermediate offset: %d.\n", first_pass_offset); |
| min += first_pass_offset; |
| max += first_pass_offset; |
| assert(min > 0); |
| assert(max < UINT16_MAX); |
| } else { |
| // 10bpp and 12bpp require int32_t for the intermediate values. Adding an |
| // offset is not required. |
| assert(min > INT32_MIN); |
| assert(max > INT16_MAX && max < INT32_MAX); |
| } |
| |
| printf(" intermediate range: [%8d, %8d]\n", min, max); |
| |
| const int first_pass_min = RightShiftWithRounding(min, horizontal_bits); |
| const int first_pass_max = RightShiftWithRounding(max, horizontal_bits); |
| |
| printf(" first pass output range: [%8d, %8d]\n", first_pass_min, |
| first_pass_max); |
| |
| // Second pass. |
| if (bitdepth == 8) { |
| ApplyFilterToUnsignedInput(first_pass_max, worst_warp_filter, &min, &max); |
| } else { |
| ApplyFilterToSignedInput(first_pass_min, first_pass_max, worst_warp_filter, |
| &min, &max); |
| } |
| |
| if (bitdepth == 8) { |
| // Remove the offset that was applied in the first pass since we must use |
| // int32_t for this phase anyway. 128 is the sum of the filter taps. |
| const int offset_removal = (first_pass_offset >> horizontal_bits) * 128; |
| printf(" 8bpp intermediate offset removal: %d.\n", offset_removal); |
| max -= offset_removal; |
| min -= offset_removal; |
| assert(min < INT16_MIN && min > INT32_MIN); |
| assert(max > INT16_MAX && max < INT32_MAX); |
| } else { |
| // 10bpp and 12bpp require int32_t for the intermediate values. Adding an |
| // offset is not required. |
| assert(min > INT32_MIN); |
| assert(max > INT16_MAX && max < INT32_MAX); |
| } |
| |
| printf(" intermediate range: [%8d, %8d]\n", min, max); |
| |
| // Second pass non-compound output is clipped to Pixel values. |
| const int second_pass_min = |
| Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input); |
| const int second_pass_max = |
| Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input); |
| printf(" second pass output range: [%8d, %8d]\n", second_pass_min, |
| second_pass_max); |
| |
| // Output is Pixel so matches Pixel values. |
| assert(second_pass_min == 0); |
| assert(second_pass_max == max_input); |
| |
| const int compound_second_pass_min = |
| RightShiftWithRounding(min, compound_vertical_bits) + compound_offset; |
| const int compound_second_pass_max = |
| RightShiftWithRounding(max, compound_vertical_bits) + compound_offset; |
| |
| printf(" compound second pass output range: [%8d, %8d]\n", |
| compound_second_pass_min, compound_second_pass_max); |
| |
| if (bitdepth == 8) { |
| // 8bpp output is int16_t without an offset. |
| assert(compound_second_pass_min > INT16_MIN); |
| assert(compound_second_pass_max < INT16_MAX); |
| } else { |
| // 10bpp and 12bpp use the offset to fit inside uint16_t. |
| assert(compound_second_pass_min > 0); |
| assert(compound_second_pass_max < UINT16_MAX); |
| } |
| |
| printf("\n"); |
| } |
| |
| TEST(WarpTest, ShowRange) { |
| ShowRange<kBitdepth8>(); |
| ShowRange<kBitdepth10>(); |
| ShowRange<kBitdepth12>(); |
| } |
| |
| using WarpTest8bpp = WarpTest</*is_compound=*/false, 8, uint8_t>; |
| // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via |
| // WarpCompoundTest. |
| // using WarpCompoundTest8bpp = WarpTest</*is_compound=*/true, 8, uint8_t>; |
| |
| // Verifies the sum of the warped filter coefficients is 128 for every filter. |
| // |
| // Verifies the properties used in the calculation of ranges of variables in |
| // the block warp process: |
| // * The maximum sum of the positive warped filter coefficients is 175. |
| // * The minimum (i.e., most negative) sum of the negative warped filter |
| // coefficients is -47. |
| // |
| // NOTE: This test is independent of the bitdepth and the implementation of the |
| // block warp function, so it just needs to be a test in the WarpTest8bpp class |
| // and does not need to be defined with TEST_P. |
| TEST(WarpTest8bpp, WarpedFilterCoefficientSums) { |
| int max_positive_sum = 0; |
| int min_negative_sum = 0; |
| for (const auto& filter : kWarpedFilters) { |
| int sum = 0; |
| int positive_sum = 0; |
| int negative_sum = 0; |
| for (const auto coefficient : filter) { |
| sum += coefficient; |
| if (coefficient > 0) { |
| positive_sum += coefficient; |
| } else { |
| negative_sum += coefficient; |
| } |
| } |
| EXPECT_EQ(sum, 128); |
| max_positive_sum = std::max(positive_sum, max_positive_sum); |
| min_negative_sum = std::min(negative_sum, min_negative_sum); |
| } |
| EXPECT_EQ(max_positive_sum, 175); |
| EXPECT_EQ(min_negative_sum, -47); |
| } |
| |
| TEST_P(WarpTest8bpp, FixedValues) { TestFixedValues(); } |
| |
| TEST_P(WarpTest8bpp, RandomValues) { TestRandomValues(); } |
| |
| TEST_P(WarpTest8bpp, DISABLED_Speed) { TestSpeed(); } |
| const WarpTestParam warp_test_param[] = { |
| WarpTestParam(8, 8), WarpTestParam(8, 16), WarpTestParam(16, 8), |
| WarpTestParam(16, 16), WarpTestParam(16, 32), WarpTestParam(32, 16), |
| WarpTestParam(32, 32), WarpTestParam(32, 64), WarpTestParam(64, 32), |
| WarpTestParam(64, 64), WarpTestParam(64, 128), WarpTestParam(128, 64), |
| WarpTestParam(128, 128), |
| }; |
| |
| INSTANTIATE_TEST_SUITE_P(C, WarpTest8bpp, testing::ValuesIn(warp_test_param)); |
| |
| #if LIBGAV1_ENABLE_NEON |
| INSTANTIATE_TEST_SUITE_P(NEON, WarpTest8bpp, |
| testing::ValuesIn(warp_test_param)); |
| #endif |
| |
| #if LIBGAV1_ENABLE_SSE4_1 |
| INSTANTIATE_TEST_SUITE_P(SSE41, WarpTest8bpp, |
| testing::ValuesIn(warp_test_param)); |
| #endif |
| |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| using WarpTest10bpp = WarpTest</*is_compound=*/false, 10, uint16_t>; |
| // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via |
| // WarpCompoundTest. |
| // using WarpCompoundTest10bpp = WarpTest</*is_compound=*/true, 10, uint16_t>; |
| |
| TEST_P(WarpTest10bpp, FixedValues) { TestFixedValues(); } |
| |
| TEST_P(WarpTest10bpp, RandomValues) { TestRandomValues(); } |
| |
| TEST_P(WarpTest10bpp, DISABLED_Speed) { TestSpeed(); } |
| |
| INSTANTIATE_TEST_SUITE_P(C, WarpTest10bpp, testing::ValuesIn(warp_test_param)); |
| |
| #if LIBGAV1_ENABLE_NEON |
| INSTANTIATE_TEST_SUITE_P(NEON, WarpTest10bpp, |
| testing::ValuesIn(warp_test_param)); |
| #endif |
| #endif // LIBGAV1_MAX_BITDEPTH >= 10 |
| |
| #if LIBGAV1_MAX_BITDEPTH == 12 |
| using WarpTest12bpp = WarpTest</*is_compound=*/false, 12, uint16_t>; |
| // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via |
| // WarpCompoundTest. |
| // using WarpCompoundTest12bpp = WarpTest</*is_compound=*/true, 12, uint16_t>; |
| |
| TEST_P(WarpTest12bpp, FixedValues) { TestFixedValues(); } |
| |
| TEST_P(WarpTest12bpp, RandomValues) { TestRandomValues(); } |
| |
| TEST_P(WarpTest12bpp, DISABLED_Speed) { TestSpeed(); } |
| |
| INSTANTIATE_TEST_SUITE_P(C, WarpTest12bpp, testing::ValuesIn(warp_test_param)); |
| #endif // LIBGAV1_MAX_BITDEPTH == 12 |
| |
| std::ostream& operator<<(std::ostream& os, const WarpTestParam& warp_param) { |
| return os << "BlockSize" << warp_param.width << "x" << warp_param.height; |
| } |
| |
| } // namespace |
| } // namespace dsp |
| } // namespace libgav1 |