| // Copyright 2019 The libgav1 Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "src/dsp/cdef.h" |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstring> |
| |
| #include "src/dsp/dsp.h" |
| #include "src/utils/common.h" |
| |
| namespace libgav1 { |
| namespace dsp { |
| namespace { |
| |
| constexpr uint16_t kCdefLargeValue = 30000; |
| |
| constexpr int16_t kDivisionTable[] = {0, 840, 420, 280, 210, |
| 168, 140, 120, 105}; |
| |
| constexpr uint8_t kPrimaryTaps[2][2] = {{4, 2}, {3, 3}}; |
| |
| constexpr uint8_t kSecondaryTaps[2][2] = {{2, 1}, {2, 1}}; |
| |
| constexpr int8_t kCdefDirections[8][2][2] = { |
| {{-1, 1}, {-2, 2}}, {{0, 1}, {-1, 2}}, {{0, 1}, {0, 2}}, {{0, 1}, {1, 2}}, |
| {{1, 1}, {2, 2}}, {{1, 0}, {2, 1}}, {{1, 0}, {2, 0}}, {{1, 0}, {2, -1}}}; |
| |
| int Constrain(int diff, int threshold, int damping) { |
| if (threshold == 0) return 0; |
| damping = std::max(0, damping - FloorLog2(threshold)); |
| const int sign = (diff < 0) ? -1 : 1; |
| return sign * |
| Clip3(threshold - (std::abs(diff) >> damping), 0, std::abs(diff)); |
| } |
| |
| int32_t Square(int32_t x) { return x * x; } |
| |
| template <int bitdepth, typename Pixel> |
| void CdefDirection_C(const void* const source, ptrdiff_t stride, |
| int* const direction, int* const variance) { |
| assert(direction != nullptr); |
| assert(variance != nullptr); |
| const auto* src = static_cast<const Pixel*>(source); |
| stride /= sizeof(Pixel); |
| int32_t cost[8] = {}; |
| // |partial| does not have to be int32_t for 8bpp. int16_t will suffice. We |
| // use int32_t to keep it simple since |cost| will have to be int32_t. |
| int32_t partial[8][15] = {}; |
| for (int i = 0; i < 8; ++i) { |
| for (int j = 0; j < 8; ++j) { |
| const int x = (src[j] >> (bitdepth - 8)) - 128; |
| partial[0][i + j] += x; |
| partial[1][i + j / 2] += x; |
| partial[2][i] += x; |
| partial[3][3 + i - j / 2] += x; |
| partial[4][7 + i - j] += x; |
| partial[5][3 - i / 2 + j] += x; |
| partial[6][j] += x; |
| partial[7][i / 2 + j] += x; |
| } |
| src += stride; |
| } |
| for (int i = 0; i < 8; ++i) { |
| cost[2] += Square(partial[2][i]); |
| cost[6] += Square(partial[6][i]); |
| } |
| cost[2] *= kDivisionTable[8]; |
| cost[6] *= kDivisionTable[8]; |
| for (int i = 0; i < 7; ++i) { |
| cost[0] += (Square(partial[0][i]) + Square(partial[0][14 - i])) * |
| kDivisionTable[i + 1]; |
| cost[4] += (Square(partial[4][i]) + Square(partial[4][14 - i])) * |
| kDivisionTable[i + 1]; |
| } |
| cost[0] += Square(partial[0][7]) * kDivisionTable[8]; |
| cost[4] += Square(partial[4][7]) * kDivisionTable[8]; |
| for (int i = 1; i < 8; i += 2) { |
| for (int j = 0; j < 5; ++j) { |
| cost[i] += Square(partial[i][3 + j]); |
| } |
| cost[i] *= kDivisionTable[8]; |
| for (int j = 0; j < 3; ++j) { |
| cost[i] += (Square(partial[i][j]) + Square(partial[i][10 - j])) * |
| kDivisionTable[2 * j + 2]; |
| } |
| } |
| int32_t best_cost = 0; |
| *direction = 0; |
| for (int i = 0; i < 8; ++i) { |
| if (cost[i] > best_cost) { |
| best_cost = cost[i]; |
| *direction = i; |
| } |
| } |
| *variance = (best_cost - cost[(*direction + 4) & 7]) >> 10; |
| } |
| |
| // Filters the source block. It doesn't check whether the candidate pixel is |
| // inside the frame. However it requires the source input to be padded with a |
| // constant large value if at the boundary. And the input should be uint16_t. |
| template <int bitdepth, typename Pixel> |
| void CdefFilter_C(const void* const source, const ptrdiff_t source_stride, |
| const int rows4x4, const int columns4x4, const int curr_x, |
| const int curr_y, const int subsampling_x, |
| const int subsampling_y, const int primary_strength, |
| const int secondary_strength, const int damping, |
| const int direction, void* const dest, |
| const ptrdiff_t dest_stride) { |
| const int coeff_shift = bitdepth - 8; |
| const int plane_width = MultiplyBy4(columns4x4) >> subsampling_x; |
| const int plane_height = MultiplyBy4(rows4x4) >> subsampling_y; |
| const int block_width = std::min(8 >> subsampling_x, plane_width - curr_x); |
| const int block_height = std::min(8 >> subsampling_y, plane_height - curr_y); |
| const auto* src = static_cast<const uint16_t*>(source); |
| auto* dst = static_cast<Pixel*>(dest); |
| const ptrdiff_t dst_stride = dest_stride / sizeof(Pixel); |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| int16_t sum = 0; |
| const uint16_t pixel_value = src[x]; |
| uint16_t max_value = pixel_value; |
| uint16_t min_value = pixel_value; |
| for (int k = 0; k < 2; ++k) { |
| const int signs[] = {-1, 1}; |
| for (const int& sign : signs) { |
| int dy = sign * kCdefDirections[direction][k][0]; |
| int dx = sign * kCdefDirections[direction][k][1]; |
| uint16_t value = src[dy * source_stride + dx + x]; |
| // Note: the summation can ignore the condition check in SIMD |
| // implementation, because Constrain() will return 0 when |
| // value == kCdefLargeValue. |
| if (value != kCdefLargeValue) { |
| sum += Constrain(value - pixel_value, primary_strength, damping) * |
| kPrimaryTaps[(primary_strength >> coeff_shift) & 1][k]; |
| max_value = std::max(value, max_value); |
| min_value = std::min(value, min_value); |
| } |
| const int offsets[] = {-2, 2}; |
| for (const int& offset : offsets) { |
| dy = sign * kCdefDirections[(direction + offset) & 7][k][0]; |
| dx = sign * kCdefDirections[(direction + offset) & 7][k][1]; |
| value = src[dy * source_stride + dx + x]; |
| // Note: the summation can ignore the condition check in SIMD |
| // implementation. |
| if (value != kCdefLargeValue) { |
| sum += |
| Constrain(value - pixel_value, secondary_strength, damping) * |
| kSecondaryTaps[(primary_strength >> coeff_shift) & 1][k]; |
| max_value = std::max(value, max_value); |
| min_value = std::min(value, min_value); |
| } |
| } |
| } |
| } |
| |
| dst[x] = static_cast<Pixel>(Clip3( |
| pixel_value + ((8 + sum - (sum < 0)) >> 4), min_value, max_value)); |
| } while (++x < block_width); |
| |
| src += source_stride; |
| dst += dst_stride; |
| } while (++y < block_height); |
| } |
| |
| void Init8bpp() { |
| Dsp* const dsp = dsp_internal::GetWritableDspTable(8); |
| assert(dsp != nullptr); |
| #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| dsp->cdef_direction = CdefDirection_C<8, uint8_t>; |
| dsp->cdef_filter = CdefFilter_C<8, uint8_t>; |
| #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| static_cast<void>(dsp); |
| #ifndef LIBGAV1_Dsp8bpp_CdefDirection |
| dsp->cdef_direction = CdefDirection_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_CdefFilter |
| dsp->cdef_filter = CdefFilter_C<8, uint8_t>; |
| #endif |
| #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| } |
| |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| void Init10bpp() { |
| Dsp* const dsp = dsp_internal::GetWritableDspTable(10); |
| assert(dsp != nullptr); |
| #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| dsp->cdef_direction = CdefDirection_C<10, uint16_t>; |
| dsp->cdef_filter = CdefFilter_C<10, uint16_t>; |
| #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| static_cast<void>(dsp); |
| #ifndef LIBGAV1_Dsp10bpp_CdefDirection |
| dsp->cdef_direction = CdefDirection_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_CdefFilter |
| dsp->cdef_filter = CdefFilter_C<10, uint16_t>; |
| #endif |
| #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| } |
| #endif |
| |
| } // namespace |
| |
| void CdefInit_C() { |
| Init8bpp(); |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| Init10bpp(); |
| #endif |
| } |
| |
| } // namespace dsp |
| } // namespace libgav1 |