| // Copyright 2019 The libgav1 Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "src/dsp/inverse_transform.h" |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <cstring> |
| |
| #include "src/dsp/dsp.h" |
| #include "src/utils/array_2d.h" |
| #include "src/utils/common.h" |
| #include "src/utils/compiler_attributes.h" |
| #include "src/utils/logging.h" |
| |
| namespace libgav1 { |
| namespace dsp { |
| namespace { |
| |
| // Include the constants and utility functions inside the anonymous namespace. |
| #include "src/dsp/inverse_transform.inc" |
| |
| constexpr uint8_t kTransformColumnShift = 4; |
| |
| int32_t RangeCheckValue(int32_t value, int8_t range) { |
| #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \ |
| LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK |
| assert(range <= 32); |
| const int32_t min = -(1 << (range - 1)); |
| const int32_t max = (1 << (range - 1)) - 1; |
| if (min > value || value > max) { |
| LIBGAV1_DLOG(ERROR, "coeff out of bit range, value: %d bit range %d\n", |
| value, range); |
| assert(min <= value && value <= max); |
| } |
| #endif // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK |
| static_cast<void>(range); |
| return value; |
| } |
| |
| template <typename Residual> |
| LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a, |
| int b, int angle, bool flip, |
| int8_t range) { |
| // Note that we multiply in 32 bits and then add/subtract the products in 64 |
| // bits. The 32-bit multiplications do not overflow. Please see the comment |
| // and assert() in Cos128(). |
| const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) - |
| static_cast<int64_t>(dst[b] * Sin128(angle)); |
| const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) + |
| static_cast<int64_t>(dst[b] * Cos128(angle)); |
| // Section 7.13.2.1: It is a requirement of bitstream conformance that the |
| // values saved into the array T by this function are representable by a |
| // signed integer using |range| bits of precision. |
| dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range); |
| dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range); |
| } |
| |
| template <typename Residual> |
| void HadamardRotation_C(Residual* const dst, int a, int b, bool flip, |
| int8_t range) { |
| if (flip) std::swap(a, b); |
| --range; |
| // For Adst and Dct, the maximum possible value for range is 20. So min and |
| // max should always fit into int32_t. |
| const int32_t min = -(1 << range); |
| const int32_t max = (1 << range) - 1; |
| const int32_t x = dst[a] + dst[b]; |
| const int32_t y = dst[a] - dst[b]; |
| dst[a] = Clip3(x, min, max); |
| dst[b] = Clip3(y, min, max); |
| } |
| |
| //------------------------------------------------------------------------------ |
| // Discrete Cosine Transforms (DCT). |
| |
| // Value for index (i, j) is computed as bitreverse(j) and interpreting that as |
| // an integer with bit-length i + 2. |
| // For e.g. index (2, 3) will be computed as follows: |
| // * bitreverse(3) = bitreverse(..000011) = 110000... |
| // * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12 |
| const uint8_t kBitReverseLookup[kNum1DTransformSizes][64] = { |
| {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, |
| 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, |
| 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}, |
| {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, |
| 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, |
| 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7}, |
| {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15, |
| 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15, |
| 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15, |
| 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}, |
| {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, |
| 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31, |
| 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, |
| 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31}, |
| {0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60, |
| 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62, |
| 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61, |
| 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}}; |
| |
| template <typename Residual, int size_log2> |
| void Dct_C(void* dest, const void* source, int8_t range) { |
| static_assert(size_log2 >= 2 && size_log2 <= 6, ""); |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| // stage 1. |
| const int size = 1 << size_log2; |
| // The copy is necessary because |dst| and |src| could be pointing to the same |
| // buffer. |
| Residual temp[size]; |
| memcpy(temp, src, sizeof(temp)); |
| for (int i = 0; i < size; ++i) { |
| dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]]; |
| } |
| // stages 2-32 are dependent on the value of size_log2. |
| // stage 2. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 16; ++i) { |
| ButterflyRotation_C(dst, i + 32, 63 - i, |
| 63 - MultiplyBy4(kBitReverseLookup[2][i]), false, |
| range); |
| } |
| } |
| // stage 3 |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 8; ++i) { |
| ButterflyRotation_C(dst, i + 16, 31 - i, |
| 6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false, |
| range); |
| } |
| } |
| // stage 4. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 16; ++i) { |
| HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33, |
| static_cast<bool>(i & 1), range); |
| } |
| } |
| // stage 5. |
| if (size_log2 >= 4) { |
| for (int i = 0; i < 4; ++i) { |
| ButterflyRotation_C(dst, i + 8, 15 - i, |
| 12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false, |
| range); |
| } |
| } |
| // stage 6. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 8; ++i) { |
| HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17, |
| static_cast<bool>(i & 1), range); |
| } |
| } |
| // stage 7. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 4; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| ButterflyRotation_C( |
| dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33, |
| 60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true, |
| range); |
| } |
| } |
| } |
| // stage 8. |
| if (size_log2 >= 3) { |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range); |
| } |
| } |
| // stage 9. |
| if (size_log2 >= 4) { |
| for (int i = 0; i < 4; ++i) { |
| HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9, |
| static_cast<bool>(i & 1), range); |
| } |
| } |
| // stage 10. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| ButterflyRotation_C( |
| dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17, |
| 24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range); |
| } |
| } |
| } |
| // stage 11. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 8; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| HadamardRotation_C(dst, MultiplyBy4(i) + j + 32, |
| MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1), |
| range); |
| } |
| } |
| } |
| // stage 12. |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i, |
| i == 0, range); |
| } |
| // stage 13. |
| if (size_log2 >= 3) { |
| for (int i = 0; i < 2; ++i) { |
| HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5, |
| static_cast<bool>(i), range); |
| } |
| } |
| // stage 14. |
| if (size_log2 >= 4) { |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range); |
| } |
| } |
| // stage 15. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 4; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| HadamardRotation_C(dst, MultiplyBy4(i) + j + 16, |
| MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1), |
| range); |
| } |
| } |
| } |
| // stage 16. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 4; ++j) { |
| ButterflyRotation_C( |
| dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34, |
| 56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range); |
| } |
| } |
| } |
| // stage 17. |
| for (int i = 0; i < 2; ++i) { |
| HadamardRotation_C(dst, i, 3 - i, false, range); |
| } |
| // stage 18. |
| if (size_log2 >= 3) { |
| ButterflyRotation_C(dst, 6, 5, 32, true, range); |
| } |
| // stage 19. |
| if (size_log2 >= 4) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11, |
| static_cast<bool>(i), range); |
| } |
| } |
| } |
| // stage 20. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 4; ++i) { |
| ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true, |
| range); |
| } |
| } |
| // stage 21. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 4; ++i) { |
| for (int j = 0; j < 4; ++j) { |
| HadamardRotation_C(dst, MultiplyBy8(i) + j + 32, |
| MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1), |
| range); |
| } |
| } |
| } |
| // stage 22. |
| if (size_log2 >= 3) { |
| for (int i = 0; i < 4; ++i) { |
| HadamardRotation_C(dst, i, 7 - i, false, range); |
| } |
| } |
| // stage 23. |
| if (size_log2 >= 4) { |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range); |
| } |
| } |
| // stage 24. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 4; ++j) { |
| HadamardRotation_C(dst, MultiplyBy8(i) + j + 16, |
| MultiplyBy8(i) - j + 23, i == 1, range); |
| } |
| } |
| } |
| // stage 25. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 8; ++i) { |
| ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range); |
| } |
| } |
| // stage 26. |
| if (size_log2 >= 4) { |
| for (int i = 0; i < 8; ++i) { |
| HadamardRotation_C(dst, i, 15 - i, false, range); |
| } |
| } |
| // stage 27. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 4; ++i) { |
| ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range); |
| } |
| } |
| // stage 28. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 8; ++i) { |
| HadamardRotation_C(dst, i + 32, 47 - i, false, range); |
| HadamardRotation_C(dst, i + 48, 63 - i, true, range); |
| } |
| } |
| // stage 29. |
| if (size_log2 >= 5) { |
| for (int i = 0; i < 16; ++i) { |
| HadamardRotation_C(dst, i, 31 - i, false, range); |
| } |
| } |
| // stage 30. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 8; ++i) { |
| ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range); |
| } |
| } |
| // stage 31. |
| if (size_log2 == 6) { |
| for (int i = 0; i < 32; ++i) { |
| HadamardRotation_C(dst, i, 63 - i, false, range); |
| } |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| // Asymmetric Discrete Sine Transforms (ADST). |
| |
| /* |
| * Row transform max range in bits for bitdepths 8/10/12: 28/30/32. |
| * Column transform max range in bits for bitdepths 8/10/12: 28/28/30. |
| */ |
| template <typename Residual> |
| void Adst4_C(void* dest, const void* source, int8_t range) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| if ((src[0] | src[1] | src[2] | src[3]) == 0) { |
| memset(dst, 0, 4 * sizeof(dst[0])); |
| return; |
| } |
| |
| // stage 1. |
| // Section 7.13.2.6: It is a requirement of bitstream conformance that all |
| // values stored in the s and x arrays by this process are representable by |
| // a signed integer using range + 12 bits of precision. |
| int32_t s[7]; |
| s[0] = RangeCheckValue(kAdst4Multiplier[0] * src[0], range + 12); |
| s[1] = RangeCheckValue(kAdst4Multiplier[1] * src[0], range + 12); |
| s[2] = RangeCheckValue(kAdst4Multiplier[2] * src[1], range + 12); |
| s[3] = RangeCheckValue(kAdst4Multiplier[3] * src[2], range + 12); |
| s[4] = RangeCheckValue(kAdst4Multiplier[0] * src[2], range + 12); |
| s[5] = RangeCheckValue(kAdst4Multiplier[1] * src[3], range + 12); |
| s[6] = RangeCheckValue(kAdst4Multiplier[3] * src[3], range + 12); |
| // stage 2. |
| // Section 7.13.2.6: It is a requirement of bitstream conformance that |
| // values stored in the variable a7 by this process are representable by a |
| // signed integer using range + 1 bits of precision. |
| const int32_t a7 = RangeCheckValue(src[0] - src[2], range + 1); |
| // Section 7.13.2.6: It is a requirement of bitstream conformance that |
| // values stored in the variable b7 by this process are representable by a |
| // signed integer using |range| bits of precision. |
| const int32_t b7 = RangeCheckValue(a7 + src[3], range); |
| // stage 3. |
| s[0] = RangeCheckValue(s[0] + s[3], range + 12); |
| s[1] = RangeCheckValue(s[1] - s[4], range + 12); |
| s[3] = s[2]; |
| s[2] = RangeCheckValue(kAdst4Multiplier[2] * b7, range + 12); |
| // stage 4. |
| s[0] = RangeCheckValue(s[0] + s[5], range + 12); |
| s[1] = RangeCheckValue(s[1] - s[6], range + 12); |
| // stages 5 and 6. |
| const int32_t x0 = RangeCheckValue(s[0] + s[3], range + 12); |
| const int32_t x1 = RangeCheckValue(s[1] + s[3], range + 12); |
| int32_t x3 = RangeCheckValue(s[0] + s[1], range + 12); |
| x3 = RangeCheckValue(x3 - s[3], range + 12); |
| int32_t dst_0 = RightShiftWithRounding(x0, 12); |
| int32_t dst_1 = RightShiftWithRounding(x1, 12); |
| int32_t dst_2 = RightShiftWithRounding(s[2], 12); |
| int32_t dst_3 = RightShiftWithRounding(x3, 12); |
| if (sizeof(Residual) == 2) { |
| // If the first argument to RightShiftWithRounding(..., 12) is only |
| // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it |
| // in RightShiftWithRounding(..., 12) will cause the function to return |
| // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff. |
| dst_0 -= (dst_0 == 0x8000); |
| dst_1 -= (dst_1 == 0x8000); |
| dst_3 -= (dst_3 == 0x8000); |
| } |
| dst[0] = dst_0; |
| dst[1] = dst_1; |
| dst[2] = dst_2; |
| dst[3] = dst_3; |
| } |
| |
| template <typename Residual> |
| void AdstInputPermutation(int32_t* const dst, const Residual* const src, |
| int n) { |
| assert(n == 8 || n == 16); |
| for (int i = 0; i < n; ++i) { |
| dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1]; |
| } |
| } |
| |
| constexpr int8_t kAdstOutputPermutationLookup[16] = { |
| 0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1}; |
| |
| template <typename Residual> |
| void AdstOutputPermutation(Residual* const dst, const int32_t* const src, |
| int n) { |
| assert(n == 8 || n == 16); |
| const auto shift = static_cast<int8_t>(n == 8); |
| for (int i = 0; i < n; ++i) { |
| const int8_t index = kAdstOutputPermutationLookup[i] >> shift; |
| int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index]; |
| if (sizeof(Residual) == 2) { |
| // If i is odd and src[index] is -32768, dst_i will be 32768, which |
| // cannot be represented as an int16_t. |
| dst_i -= (dst_i == 0x8000); |
| } |
| dst[i] = dst_i; |
| } |
| } |
| |
| template <typename Residual> |
| void Adst8_C(void* dest, const void* source, int8_t range) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| // stage 1. |
| int32_t temp[8]; |
| AdstInputPermutation(temp, src, 8); |
| // stage 2. |
| for (int i = 0; i < 4; ++i) { |
| ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i, |
| true, range); |
| } |
| // stage 3. |
| for (int i = 0; i < 4; ++i) { |
| HadamardRotation_C(temp, i, i + 4, false, range); |
| } |
| // stage 4. |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range); |
| } |
| // stage 5. |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2, |
| false, range); |
| } |
| } |
| // stage 6. |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true, |
| range); |
| } |
| // stage 7. |
| AdstOutputPermutation(dst, temp, 8); |
| } |
| |
| template <typename Residual> |
| void Adst16_C(void* dest, const void* source, int8_t range) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| // stage 1. |
| int32_t temp[16]; |
| AdstInputPermutation(temp, src, 16); |
| // stage 2. |
| for (int i = 0; i < 8; ++i) { |
| ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i, |
| true, range); |
| } |
| // stage 3. |
| for (int i = 0; i < 8; ++i) { |
| HadamardRotation_C(temp, i, i + 8, false, range); |
| } |
| // stage 4. |
| for (int i = 0; i < 2; ++i) { |
| ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9, |
| 56 - 32 * i, true, range); |
| ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12, |
| 8 + 32 * i, true, range); |
| } |
| // stage 5. |
| for (int i = 0; i < 4; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4, |
| false, range); |
| } |
| } |
| // stage 6. |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4, |
| i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range); |
| } |
| } |
| // stage 7. |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 4; ++j) { |
| HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2, |
| false, range); |
| } |
| } |
| // stage 8. |
| for (int i = 0; i < 4; ++i) { |
| ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true, |
| range); |
| } |
| // stage 9. |
| AdstOutputPermutation(dst, temp, 16); |
| } |
| |
| //------------------------------------------------------------------------------ |
| // Identity Transforms. |
| // |
| // In the spec, the inverse identity transform is followed by a Round2() call: |
| // The row transforms with i = 0..(h-1) are applied as follows: |
| // ... |
| // * Otherwise, invoke the inverse identity transform process specified in |
| // section 7.13.2.15 with the input variable n equal to log2W. |
| // * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift ) |
| // for j = 0..(w-1). |
| // ... |
| // The column transforms with j = 0..(w-1) are applied as follows: |
| // ... |
| // * Otherwise, invoke the inverse identity transform process specified in |
| // section 7.13.2.15 with the input variable n equal to log2H. |
| // * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift ) |
| // for i = 0..(h-1). |
| // |
| // Therefore, we define the identity transform functions to perform both the |
| // inverse identity transform and the Round2() call. This has two advantages: |
| // 1. The outputs of the inverse identity transform do not need to be stored |
| // in the Residual array. They can be stored in int32_t local variables, |
| // which have a larger range if Residual is an int16_t array. |
| // 2. The inverse identity transform and the Round2() call can be jointly |
| // optimized. |
| // |
| // The identity transform functions have the following prototype: |
| // void Identity_C(void* dest, const void* source, int8_t shift); |
| // |
| // The |shift| parameter is the amount of shift for the Round2() call. For row |
| // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always |
| // 4. Therefore, an identity transform function can detect whether it is being |
| // invoked as a row transform or a column transform by checking whether |shift| |
| // is equal to 4. |
| // |
| // Input Range |
| // |
| // The inputs of row transforms, stored in the 2D array Dequant, are |
| // representable by a signed integer using 8 + BitDepth bits of precision: |
| // f. Dequant[ i ][ j ] is set equal to |
| // Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ). |
| // |
| // The inputs of column transforms are representable by a signed integer using |
| // Max( BitDepth + 6, 16 ) bits of precision: |
| // Set the variable colClampRange equal to Max( BitDepth + 6, 16 ). |
| // ... |
| // Between the row and column transforms, Residual[ i ][ j ] is set equal to |
| // Clip3( - ( 1 << ( colClampRange - 1 ) ), |
| // ( 1 << (colClampRange - 1 ) ) - 1, |
| // Residual[ i ][ j ] ) |
| // for i = 0..(h-1), for j = 0..(w-1). |
| // |
| // Output Range |
| // |
| // The outputs of row transforms are representable by a signed integer using |
| // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect |
| // of the multiplicative factor of inverse identity transforms minus the |
| // smallest row shift is an increase of at most one bit. |
| // |
| // Transform | Multiplicative factor | Smallest row | Net increase |
| // width | (in bits) | shift | in bits |
| // --------------------------------------------------------------- |
| // 4 | sqrt(2) (0.5 bits) | 0 | +0.5 |
| // 8 | 2 (1 bit) | 0 | +1 |
| // 16 | 2*sqrt(2) (1.5 bits) | 1 | +0.5 |
| // 32 | 4 (2 bits) | 1 | +1 |
| // |
| // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we |
| // clip the outputs (which have 17 bits of precision) to the range of int16_t |
| // before storing them in the Residual array. This clipping happens to be the |
| // same as the required clipping after the row transform (see the spec quoted |
| // above), so we remain compliant with the spec. (In this case, |
| // TransformLoop_C() skips clipping the outputs of row transforms to avoid |
| // duplication of effort.) |
| // |
| // The outputs of column transforms are representable by a signed integer using |
| // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision, |
| // because the multiplicative factor of inverse identity transforms is at most |
| // 4 (2 bits) and |shift| is always 4. |
| |
| template <typename Residual> |
| void Identity4Row_C(void* dest, const void* source, int8_t shift) { |
| assert(shift == 0 || shift == 1); |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding| |
| // should be (1 + (1 << 1)) << 11. The following expression works for both |
| // values of |shift|. |
| const int32_t rounding = (1 + (shift << 1)) << 11; |
| for (int i = 0; i < 4; ++i) { |
| // The intermediate value here will have to fit into an int32_t for it to be |
| // bitstream conformant. The multiplication is promoted to int32_t by |
| // defining kIdentity4Multiplier as int32_t. |
| int32_t dst_i = (src[i] * kIdentity4Multiplier + rounding) >> (12 + shift); |
| if (sizeof(Residual) == 2) { |
| dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX); |
| } |
| dst[i] = static_cast<Residual>(dst_i); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity4Column_C(void* dest, const void* source, int8_t /*shift*/) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11; |
| for (int i = 0; i < 4; ++i) { |
| // The intermediate value here will have to fit into an int32_t for it to be |
| // bitstream conformant. The multiplication is promoted to int32_t by |
| // defining kIdentity4Multiplier as int32_t. |
| dst[i] = static_cast<Residual>((src[i] * kIdentity4Multiplier + rounding) >> |
| (12 + kTransformColumnShift)); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity8Row_C(void* dest, const void* source, int8_t shift) { |
| assert(shift == 0 || shift == 1 || shift == 2); |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| for (int i = 0; i < 8; ++i) { |
| int32_t dst_i = RightShiftWithRounding(MultiplyBy2(src[i]), shift); |
| if (sizeof(Residual) == 2) { |
| dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX); |
| } |
| dst[i] = static_cast<Residual>(dst_i); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity8Column_C(void* dest, const void* source, int8_t /*shift*/) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| for (int i = 0; i < 8; ++i) { |
| dst[i] = static_cast<Residual>( |
| RightShiftWithRounding(src[i], kTransformColumnShift - 1)); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity16Row_C(void* dest, const void* source, int8_t shift) { |
| assert(shift == 1 || shift == 2); |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| const int32_t rounding = (1 + (1 << shift)) << 11; |
| for (int i = 0; i < 16; ++i) { |
| // The intermediate value here will have to fit into an int32_t for it to be |
| // bitstream conformant. The multiplication is promoted to int32_t by |
| // defining kIdentity16Multiplier as int32_t. |
| int32_t dst_i = (src[i] * kIdentity16Multiplier + rounding) >> (12 + shift); |
| if (sizeof(Residual) == 2) { |
| dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX); |
| } |
| dst[i] = static_cast<Residual>(dst_i); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity16Column_C(void* dest, const void* source, int8_t /*shift*/) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11; |
| for (int i = 0; i < 16; ++i) { |
| // The intermediate value here will have to fit into an int32_t for it to be |
| // bitstream conformant. The multiplication is promoted to int32_t by |
| // defining kIdentity16Multiplier as int32_t. |
| dst[i] = |
| static_cast<Residual>((src[i] * kIdentity16Multiplier + rounding) >> |
| (12 + kTransformColumnShift)); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity32Row_C(void* dest, const void* source, int8_t shift) { |
| assert(shift == 1 || shift == 2); |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| for (int i = 0; i < 32; ++i) { |
| int32_t dst_i = RightShiftWithRounding(MultiplyBy4(src[i]), shift); |
| if (sizeof(Residual) == 2) { |
| dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX); |
| } |
| dst[i] = static_cast<Residual>(dst_i); |
| } |
| } |
| |
| template <typename Residual> |
| void Identity32Column_C(void* dest, const void* source, int8_t /*shift*/) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| for (int i = 0; i < 32; ++i) { |
| dst[i] = static_cast<Residual>( |
| RightShiftWithRounding(src[i], kTransformColumnShift - 2)); |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| // Walsh Hadamard Transform. |
| |
| template <typename Residual> |
| void Wht4_C(void* dest, const void* source, int8_t shift) { |
| auto* const dst = static_cast<Residual*>(dest); |
| const auto* const src = static_cast<const Residual*>(source); |
| Residual temp[4]; |
| temp[0] = src[0] >> shift; |
| temp[2] = src[1] >> shift; |
| temp[3] = src[2] >> shift; |
| temp[1] = src[3] >> shift; |
| temp[0] += temp[2]; |
| temp[3] -= temp[1]; |
| // This signed right shift must be an arithmetic shift. |
| Residual e = (temp[0] - temp[3]) >> 1; |
| dst[1] = e - temp[1]; |
| dst[2] = e - temp[2]; |
| dst[0] = temp[0] - dst[1]; |
| dst[3] = temp[3] + dst[2]; |
| } |
| |
| //------------------------------------------------------------------------------ |
| // row/column transform loop |
| |
| using InverseTransform1DFunc = void (*)(void* dst, const void* src, |
| int8_t range); |
| |
| template <int bitdepth, typename Residual, typename Pixel, |
| Transform1D transform1d_type, |
| InverseTransform1DFunc row_transform1d_func, |
| InverseTransform1DFunc column_transform1d_func = row_transform1d_func> |
| void TransformLoop_C(TransformType tx_type, TransformSize tx_size, |
| void* src_buffer, int start_x, int start_y, |
| void* dst_frame, bool is_row, int non_zero_coeff_count) { |
| constexpr bool lossless = transform1d_type == k1DTransformWht; |
| constexpr bool is_identity = transform1d_type == k1DTransformIdentity; |
| // The transform size of the WHT is always 4x4. Setting tx_width and |
| // tx_height to the constant 4 for the WHT speeds the code up. |
| assert(!lossless || tx_size == kTransformSize4x4); |
| const int tx_width = lossless ? 4 : kTransformWidth[tx_size]; |
| const int tx_height = lossless ? 4 : kTransformHeight[tx_size]; |
| const int tx_width_log2 = kTransformWidthLog2[tx_size]; |
| const int tx_height_log2 = kTransformHeightLog2[tx_size]; |
| auto* frame = reinterpret_cast<Array2DView<Pixel>*>(dst_frame); |
| |
| // Initially this points to the dequantized values. After the transforms are |
| // applied, this buffer contains the residual. |
| Array2DView<Residual> residual(tx_height, tx_width, |
| static_cast<Residual*>(src_buffer)); |
| |
| if (is_row) { |
| // Row transforms need to be done only up to 32 because the rest of the rows |
| // are always all zero if |tx_height| is 64. Otherwise, only process the |
| // rows that have a non zero coefficients. |
| // TODO(slavarnway): Expand to include other possible non_zero_coeff_count |
| // values. |
| const int num_rows = |
| (non_zero_coeff_count == 1) ? 1 : std::min(tx_height, 32); |
| // Row transform. |
| const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size]; |
| // This is the |range| parameter of the InverseTransform1DFunc. For lossy |
| // transforms, this will be equal to the clamping range. |
| const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8); |
| // If the width:height ratio of the transform size is 2:1 or 1:2, multiply |
| // the input to the row transform by 1 / sqrt(2), which is approximated by |
| // the fraction 2896 / 2^12. |
| const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1; |
| |
| for (int i = 0; i < num_rows; ++i) { |
| // If lossless, the transform size is 4x4, so should_round is false. |
| if (!lossless && should_round) { |
| // The last 32 values of every row are always zero if the |tx_width| is |
| // 64. |
| for (int j = 0; j < std::min(tx_width, 32); ++j) { |
| residual[i][j] = RightShiftWithRounding( |
| residual[i][j] * kTransformRowMultiplier, 12); |
| } |
| } |
| // For identity transform, |row_transform1d_func| also performs the |
| // Round2(T[j], rowShift) call in the spec. |
| row_transform1d_func(residual[i], residual[i], |
| is_identity ? row_shift : row_clamp_range); |
| if (!lossless && !is_identity && row_shift > 0) { |
| for (int j = 0; j < tx_width; ++j) { |
| residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift); |
| } |
| } |
| // If Residual is int16_t (which implies bitdepth is 8), we don't need to |
| // clip residual[i][j] to 16 bits. |
| if (sizeof(Residual) > 2) { |
| const Residual intermediate_clamp_max = |
| (1 << (std::max(bitdepth + 6, 16) - 1)) - 1; |
| const Residual intermediate_clamp_min = -intermediate_clamp_max - 1; |
| for (int j = 0; j < tx_width; ++j) { |
| residual[i][j] = Clip3(residual[i][j], intermediate_clamp_min, |
| intermediate_clamp_max); |
| } |
| } |
| } |
| return; |
| } |
| |
| assert(!is_row); |
| constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift; |
| // This is the |range| parameter of the InverseTransform1DFunc. For lossy |
| // transforms, this will be equal to the clamping range. |
| const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16); |
| const bool flip_rows = transform1d_type == k1DTransformAdst && |
| kTransformFlipRowsMask.Contains(tx_type); |
| const bool flip_columns = |
| !lossless && kTransformFlipColumnsMask.Contains(tx_type); |
| const int min_value = 0; |
| const int max_value = (1 << bitdepth) - 1; |
| // Note: 64 is the maximum size of a 1D transform buffer (the largest |
| // transform size is kTransformSize64x64). |
| Residual tx_buffer[64]; |
| for (int j = 0; j < tx_width; ++j) { |
| const int flipped_j = flip_columns ? tx_width - j - 1 : j; |
| for (int i = 0; i < tx_height; ++i) { |
| tx_buffer[i] = residual[i][flipped_j]; |
| } |
| // For identity transform, |column_transform1d_func| also performs the |
| // Round2(T[i], colShift) call in the spec. |
| column_transform1d_func(tx_buffer, tx_buffer, |
| is_identity ? column_shift : column_clamp_range); |
| const int x = start_x + j; |
| for (int i = 0; i < tx_height; ++i) { |
| const int y = start_y + i; |
| const int index = flip_rows ? tx_height - i - 1 : i; |
| Residual residual_value = tx_buffer[index]; |
| if (!lossless && !is_identity) { |
| residual_value = RightShiftWithRounding(residual_value, column_shift); |
| } |
| (*frame)[y][x] = |
| Clip3((*frame)[y][x] + residual_value, min_value, max_value); |
| } |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| template <int bitdepth, typename Residual, typename Pixel> |
| void InitAll(Dsp* const dsp) { |
| // Maximum transform size for Dct is 64. |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformDct] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct, |
| Dct_C<Residual, 2>>; |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformDct] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct, |
| Dct_C<Residual, 3>>; |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformDct] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct, |
| Dct_C<Residual, 4>>; |
| dsp->inverse_transforms[k1DTransformSize32][k1DTransformDct] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct, |
| Dct_C<Residual, 5>>; |
| dsp->inverse_transforms[k1DTransformSize64][k1DTransformDct] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct, |
| Dct_C<Residual, 6>>; |
| |
| // Maximum transform size for Adst is 16. |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformAdst] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst, |
| Adst4_C<Residual>>; |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformAdst] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst, |
| Adst8_C<Residual>>; |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformAdst] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst, |
| Adst16_C<Residual>>; |
| |
| // Maximum transform size for Identity transform is 32. |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformIdentity] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity, |
| Identity4Row_C<Residual>, Identity4Column_C<Residual>>; |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformIdentity] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity, |
| Identity8Row_C<Residual>, Identity8Column_C<Residual>>; |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformIdentity] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity, |
| Identity16Row_C<Residual>, Identity16Column_C<Residual>>; |
| dsp->inverse_transforms[k1DTransformSize32][k1DTransformIdentity] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity, |
| Identity32Row_C<Residual>, Identity32Column_C<Residual>>; |
| |
| // Maximum transform size for Wht is 4. |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformWht] = |
| TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht, |
| Wht4_C<Residual>>; |
| } |
| |
| void Init8bpp() { |
| Dsp* const dsp = dsp_internal::GetWritableDspTable(8); |
| assert(dsp != nullptr); |
| for (auto& inverse_transform_by_size : dsp->inverse_transforms) { |
| for (auto& inverse_transform : inverse_transform_by_size) { |
| inverse_transform = nullptr; |
| } |
| } |
| #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| InitAll<8, int16_t, uint8_t>(dsp); |
| #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformDct] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct, Dct_C<int16_t, 2>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformDct] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct, Dct_C<int16_t, 3>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformDct] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct, Dct_C<int16_t, 4>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize32][k1DTransformDct] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct, Dct_C<int16_t, 5>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize64][k1DTransformDct] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct, Dct_C<int16_t, 6>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformAdst] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst, Adst4_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformAdst] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst, Adst8_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformAdst] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst, Adst16_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformIdentity] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity, |
| Identity4Row_C<int16_t>, Identity4Column_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformIdentity] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity, |
| Identity8Row_C<int16_t>, Identity8Column_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformIdentity] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity, |
| Identity16Row_C<int16_t>, Identity16Column_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize32][k1DTransformIdentity] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity, |
| Identity32Row_C<int16_t>, Identity32Column_C<int16_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformWht] = |
| TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht, Wht4_C<int16_t>>; |
| #endif |
| #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| } |
| |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| void Init10bpp() { |
| Dsp* const dsp = dsp_internal::GetWritableDspTable(10); |
| assert(dsp != nullptr); |
| for (auto& inverse_transform_by_size : dsp->inverse_transforms) { |
| for (auto& inverse_transform : inverse_transform_by_size) { |
| inverse_transform = nullptr; |
| } |
| } |
| #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| InitAll<10, int32_t, uint16_t>(dsp); |
| #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformDct] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct, |
| Dct_C<int32_t, 2>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformDct] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct, |
| Dct_C<int32_t, 3>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformDct] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct, |
| Dct_C<int32_t, 4>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize32][k1DTransformDct] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct, |
| Dct_C<int32_t, 5>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize64_1DTransformDct |
| dsp->inverse_transforms[k1DTransformSize64][k1DTransformDct] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct, |
| Dct_C<int32_t, 6>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformAdst |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformAdst] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst, |
| Adst4_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformAdst |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformAdst] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst, |
| Adst8_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformAdst |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformAdst] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst, |
| Adst16_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformIdentity] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity, |
| Identity4Row_C<int32_t>, Identity4Column_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize8][k1DTransformIdentity] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity, |
| Identity8Row_C<int32_t>, Identity8Column_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize16][k1DTransformIdentity] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity, |
| Identity16Row_C<int32_t>, Identity16Column_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformIdentity |
| dsp->inverse_transforms[k1DTransformSize32][k1DTransformIdentity] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity, |
| Identity32Row_C<int32_t>, Identity32Column_C<int32_t>>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformWht |
| dsp->inverse_transforms[k1DTransformSize4][k1DTransformWht] = |
| TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht, Wht4_C<int32_t>>; |
| #endif |
| #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| } |
| #endif // LIBGAV1_MAX_BITDEPTH >= 10 |
| |
| } // namespace |
| |
| void InverseTransformInit_C() { |
| Init8bpp(); |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| Init10bpp(); |
| #endif |
| |
| // Local functions that may be unused depending on the optimizations |
| // available. |
| static_cast<void>(RangeCheckValue); |
| static_cast<void>(kBitReverseLookup); |
| } |
| |
| } // namespace dsp |
| } // namespace libgav1 |