| #include "src/dsp/convolve.h" |
| |
| #include <cassert> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <cstring> |
| |
| #include "src/dsp/constants.h" |
| #include "src/dsp/dsp.h" |
| #include "src/utils/common.h" |
| #include "src/utils/constants.h" |
| |
| namespace libgav1 { |
| namespace dsp { |
| namespace { |
| |
| constexpr int kSubPixelMask = (1 << kSubPixelBits) - 1; |
| constexpr int kHorizontalOffset = 3; |
| constexpr int kVerticalOffset = 3; |
| |
| int GetFilterIndex(const int filter_index, const int length) { |
| if (length <= 4) { |
| if (filter_index == kInterpolationFilterEightTap || |
| filter_index == kInterpolationFilterEightTapSharp) { |
| return 4; |
| } |
| if (filter_index == kInterpolationFilterEightTapSmooth) { |
| return 5; |
| } |
| } |
| return filter_index; |
| } |
| |
| template <int bitdepth, typename Pixel> |
| void ConvolveScale2D_C( |
| const void* const reference, const ptrdiff_t reference_stride, |
| const int horizontal_filter_index, const int vertical_filter_index, |
| const int inter_round_bits_vertical, const int subpixel_x, |
| const int subpixel_y, const int step_x, const int step_y, const int width, |
| const int height, void* prediction, const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int intermediate_height = |
| (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> |
| kScaleSubPixelBits) + |
| kSubPixelTaps; |
| // The output of the horizontal filter, i.e. the intermediate_result, is |
| // guaranteed to fit in int16_t. |
| int16_t intermediate_result[kMaxSuperBlockSizeInPixels * |
| (2 * kMaxSuperBlockSizeInPixels + 8)]; |
| const int intermediate_stride = kMaxSuperBlockSizeInPixels; |
| const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1)); |
| const int max_pixel_value = (1 << bitdepth) - 1; |
| |
| // Horizontal filter. |
| // Filter types used for width <= 4 are different from those for width > 4. |
| // When width > 4, the valid filter index range is always [0, 3]. |
| // When width <= 4, the valid filter index range is always [4, 5]. |
| // Similarly for height. |
| int filter_index = GetFilterIndex(horizontal_filter_index, width); |
| int16_t* intermediate = intermediate_result; |
| const auto* src = static_cast<const Pixel*>(reference); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = static_cast<Pixel*>(prediction); |
| const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel); |
| const int ref_x = subpixel_x >> kScaleSubPixelBits; |
| // Note: assume the input src is already aligned to the correct start |
| // position. |
| int y = 0; |
| do { |
| int p = subpixel_x; |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << (bitdepth + kFilterBits - 1); |
| const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x]; |
| const int filter_id = (p >> 6) & kSubPixelMask; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * src_x[k]; |
| } |
| assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1))); |
| intermediate[x] = static_cast<int16_t>( |
| RightShiftWithRounding(sum, kRoundBitsHorizontal)); |
| p += step_x; |
| } while (++x < width); |
| |
| src += src_stride; |
| intermediate += intermediate_stride; |
| } while (++y < intermediate_height); |
| |
| // Vertical filter. |
| filter_index = GetFilterIndex(vertical_filter_index, height); |
| intermediate = intermediate_result; |
| const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal; |
| int p = subpixel_y & 1023; |
| y = 0; |
| do { |
| const int filter_id = (p >> 6) & kSubPixelMask; |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << offset_bits; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += |
| kSubPixelFilters[filter_index][filter_id][k] * |
| intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride + |
| x]; |
| } |
| assert(sum >= 0 && sum < (1 << (offset_bits + 2))); |
| dest[x] = static_cast<Pixel>( |
| Clip3(RightShiftWithRounding(sum, inter_round_bits_vertical) - |
| single_round_offset, |
| 0, max_pixel_value)); |
| } while (++x < width); |
| |
| dest += dest_stride; |
| p += step_y; |
| } while (++y < height); |
| } |
| |
| template <int bitdepth, typename Pixel> |
| void ConvolveCompoundScale2D_C( |
| const void* const reference, const ptrdiff_t reference_stride, |
| const int horizontal_filter_index, const int vertical_filter_index, |
| const int inter_round_bits_vertical, const int subpixel_x, |
| const int subpixel_y, const int step_x, const int step_y, const int width, |
| const int height, void* prediction, const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int intermediate_height = |
| (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> |
| kScaleSubPixelBits) + |
| kSubPixelTaps; |
| // The output of the horizontal filter, i.e. the intermediate_result, is |
| // guaranteed to fit in int16_t. |
| int16_t intermediate_result[kMaxSuperBlockSizeInPixels * |
| (2 * kMaxSuperBlockSizeInPixels + 8)]; |
| const int intermediate_stride = kMaxSuperBlockSizeInPixels; |
| |
| // Horizontal filter. |
| // Filter types used for width <= 4 are different from those for width > 4. |
| // When width > 4, the valid filter index range is always [0, 3]. |
| // When width <= 4, the valid filter index range is always [4, 5]. |
| // Similarly for height. |
| int filter_index = GetFilterIndex(horizontal_filter_index, width); |
| int16_t* intermediate = intermediate_result; |
| const auto* src = static_cast<const Pixel*>(reference); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = static_cast<uint16_t*>(prediction); |
| const int ref_x = subpixel_x >> kScaleSubPixelBits; |
| // Note: assume the input src is already aligned to the correct start |
| // position. |
| int y = 0; |
| do { |
| int p = subpixel_x; |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << (bitdepth + kFilterBits - 1); |
| const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x]; |
| const int filter_id = (p >> 6) & kSubPixelMask; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * src_x[k]; |
| } |
| assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1))); |
| intermediate[x] = static_cast<int16_t>( |
| RightShiftWithRounding(sum, kRoundBitsHorizontal)); |
| p += step_x; |
| } while (++x < width); |
| |
| src += src_stride; |
| intermediate += intermediate_stride; |
| } while (++y < intermediate_height); |
| |
| // Vertical filter. |
| filter_index = GetFilterIndex(vertical_filter_index, height); |
| intermediate = intermediate_result; |
| const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal; |
| int p = subpixel_y & 1023; |
| y = 0; |
| do { |
| const int filter_id = (p >> 6) & kSubPixelMask; |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << offset_bits; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += |
| kSubPixelFilters[filter_index][filter_id][k] * |
| intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride + |
| x]; |
| } |
| assert(sum >= 0 && sum < (1 << (offset_bits + 2))); |
| dest[x] = static_cast<uint16_t>( |
| RightShiftWithRounding(sum, inter_round_bits_vertical)); |
| } while (++x < width); |
| |
| dest += pred_stride; |
| p += step_y; |
| } while (++y < height); |
| } |
| |
| template <int bitdepth, typename Pixel> |
| void ConvolveCompound2D_C(const void* const reference, |
| const ptrdiff_t reference_stride, |
| const int horizontal_filter_index, |
| const int vertical_filter_index, |
| const int inter_round_bits_vertical, |
| const int subpixel_x, const int subpixel_y, |
| const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int intermediate_height = height + kSubPixelTaps - 1; |
| // The output of the horizontal filter, i.e. the intermediate_result, is |
| // guaranteed to fit in int16_t. |
| int16_t intermediate_result[kMaxSuperBlockSizeInPixels * |
| (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; |
| const int intermediate_stride = kMaxSuperBlockSizeInPixels; |
| |
| // Horizontal filter. |
| // Filter types used for width <= 4 are different from those for width > 4. |
| // When width > 4, the valid filter index range is always [0, 3]. |
| // When width <= 4, the valid filter index range is always [4, 5]. |
| // Similarly for height. |
| int filter_index = GetFilterIndex(horizontal_filter_index, width); |
| int16_t* intermediate = intermediate_result; |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| const auto* src = static_cast<const Pixel*>(reference) - |
| kVerticalOffset * src_stride - kHorizontalOffset; |
| auto* dest = static_cast<uint16_t*>(prediction); |
| int filter_id = (subpixel_x >> 6) & kSubPixelMask; |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << (bitdepth + kFilterBits - 1); |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k]; |
| } |
| assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1))); |
| intermediate[x] = static_cast<int16_t>( |
| RightShiftWithRounding(sum, kRoundBitsHorizontal)); |
| } while (++x < width); |
| |
| src += src_stride; |
| intermediate += intermediate_stride; |
| } while (++y < intermediate_height); |
| |
| // Vertical filter. |
| filter_index = GetFilterIndex(vertical_filter_index, height); |
| intermediate = intermediate_result; |
| filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask; |
| const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal; |
| y = 0; |
| do { |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << offset_bits; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * |
| intermediate[k * intermediate_stride + x]; |
| } |
| assert(sum >= 0 && sum < (1 << (offset_bits + 2))); |
| dest[x] = static_cast<uint16_t>( |
| RightShiftWithRounding(sum, inter_round_bits_vertical)); |
| } while (++x < width); |
| |
| dest += pred_stride; |
| intermediate += intermediate_stride; |
| } while (++y < height); |
| } |
| |
| // This function is a simplified version of ConvolveCompound2D_C. |
| // It is called when it is single prediction mode, where both horizontal and |
| // vertical filtering are required. |
| // The output is the single prediction of the block, clipped to valid pixel |
| // range. |
| template <int bitdepth, typename Pixel> |
| void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride, |
| const int horizontal_filter_index, |
| const int vertical_filter_index, |
| const int inter_round_bits_vertical, const int subpixel_x, |
| const int subpixel_y, const int /*step_x*/, |
| const int /*step_y*/, const int width, const int height, |
| void* prediction, const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int intermediate_height = height + kSubPixelTaps - 1; |
| // The output of the horizontal filter, i.e. the intermediate_result, is |
| // guaranteed to fit in int16_t. |
| int16_t intermediate_result[kMaxSuperBlockSizeInPixels * |
| (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; |
| const int intermediate_stride = kMaxSuperBlockSizeInPixels; |
| const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1)); |
| const int max_pixel_value = (1 << bitdepth) - 1; |
| |
| // Horizontal filter. |
| // Filter types used for width <= 4 are different from those for width > 4. |
| // When width > 4, the valid filter index range is always [0, 3]. |
| // When width <= 4, the valid filter index range is always [4, 5]. |
| // Similarly for height. |
| int filter_index = GetFilterIndex(horizontal_filter_index, width); |
| int16_t* intermediate = intermediate_result; |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| const auto* src = static_cast<const Pixel*>(reference) - |
| kVerticalOffset * src_stride - kHorizontalOffset; |
| auto* dest = static_cast<Pixel*>(prediction); |
| const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel); |
| int filter_id = (subpixel_x >> 6) & kSubPixelMask; |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << (bitdepth + kFilterBits - 1); |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k]; |
| } |
| assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1))); |
| intermediate[x] = static_cast<int16_t>( |
| RightShiftWithRounding(sum, kRoundBitsHorizontal)); |
| } while (++x < width); |
| |
| src += src_stride; |
| intermediate += intermediate_stride; |
| } while (++y < intermediate_height); |
| |
| // Vertical filter. |
| filter_index = GetFilterIndex(vertical_filter_index, height); |
| intermediate = intermediate_result; |
| filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask; |
| const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal; |
| y = 0; |
| do { |
| int x = 0; |
| do { |
| // An offset to guarantee the sum is non negative. |
| int sum = 1 << offset_bits; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * |
| intermediate[k * intermediate_stride + x]; |
| } |
| assert(sum >= 0 && sum < (1 << (offset_bits + 2))); |
| dest[x] = static_cast<Pixel>( |
| Clip3(RightShiftWithRounding(sum, inter_round_bits_vertical) - |
| single_round_offset, |
| 0, max_pixel_value)); |
| } while (++x < width); |
| |
| dest += dest_stride; |
| intermediate += intermediate_stride; |
| } while (++y < height); |
| } |
| |
| // This function is a simplified version of Convolve2D_C. |
| // It is called when it is single prediction mode, where only horizontal |
| // filtering is required. |
| // The output is the single prediction of the block, clipped to valid pixel |
| // range. |
| template <int bitdepth, typename Pixel> |
| void ConvolveHorizontal_C(const void* const reference, |
| const ptrdiff_t reference_stride, |
| const int horizontal_filter_index, |
| const int /*vertical_filter_index*/, |
| const int /*inter_round_bits_vertical*/, |
| const int subpixel_x, const int /*subpixel_y*/, |
| const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int filter_index = GetFilterIndex(horizontal_filter_index, width); |
| const int bits = kFilterBits - kRoundBitsHorizontal; |
| const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset; |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = static_cast<Pixel*>(prediction); |
| const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel); |
| const int filter_id = (subpixel_x >> 6) & kSubPixelMask; |
| const int max_pixel_value = (1 << bitdepth) - 1; |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| int sum = 0; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k]; |
| } |
| sum = RightShiftWithRounding(sum, kRoundBitsHorizontal); |
| dest[x] = static_cast<Pixel>( |
| Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value)); |
| } while (++x < width); |
| |
| src += src_stride; |
| dest += dest_stride; |
| } while (++y < height); |
| } |
| |
| // This function is a simplified version of Convolve2D_C. |
| // It is called when it is single prediction mode, where only vertical |
| // filtering is required. |
| // The output is the single prediction of the block, clipped to valid pixel |
| // range. |
| template <int bitdepth, typename Pixel> |
| void ConvolveVertical_C(const void* const reference, |
| const ptrdiff_t reference_stride, |
| const int /*horizontal_filter_index*/, |
| const int vertical_filter_index, |
| const int /*inter_round_bits_vertical*/, |
| const int /*subpixel_x*/, const int subpixel_y, |
| const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| const int filter_index = GetFilterIndex(vertical_filter_index, height); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| const auto* src = |
| static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride; |
| auto* dest = static_cast<Pixel*>(prediction); |
| const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel); |
| const int filter_id = (subpixel_y >> 6) & kSubPixelMask; |
| // First filter is always a copy. |
| if (filter_id == 0) { |
| // Move |src| down the actual values and not the start of the context. |
| src = static_cast<const Pixel*>(reference); |
| int y = 0; |
| do { |
| memcpy(dest, src, width * sizeof(src[0])); |
| src += src_stride; |
| dest += dest_stride; |
| } while (++y < height); |
| return; |
| } |
| const int max_pixel_value = (1 << bitdepth) - 1; |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| int sum = 0; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * |
| src[k * src_stride + x]; |
| } |
| dest[x] = static_cast<Pixel>( |
| Clip3(RightShiftWithRounding(sum, kFilterBits), 0, max_pixel_value)); |
| } while (++x < width); |
| |
| src += src_stride; |
| dest += dest_stride; |
| } while (++y < height); |
| } |
| |
| template <int bitdepth, typename Pixel> |
| void ConvolveCopy_C(const void* const reference, |
| const ptrdiff_t reference_stride, |
| const int /*horizontal_filter_index*/, |
| const int /*vertical_filter_index*/, |
| const int /*inter_round_bits_vertical*/, |
| const int /*subpixel_x*/, const int /*subpixel_y*/, |
| const int /*step_x*/, const int /*step_y*/, const int width, |
| const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| const auto* src = static_cast<const uint8_t*>(reference); |
| auto* dest = static_cast<uint8_t*>(prediction); |
| int y = 0; |
| do { |
| memcpy(dest, src, width * sizeof(Pixel)); |
| src += reference_stride; |
| dest += pred_stride; |
| } while (++y < height); |
| } |
| |
| template <int bitdepth, typename Pixel> |
| void ConvolveCompoundCopy_C(const void* const reference, |
| const ptrdiff_t reference_stride, |
| const int /*horizontal_filter_index*/, |
| const int /*vertical_filter_index*/, |
| const int /*inter_round_bits_vertical*/, |
| const int /*subpixel_x*/, const int /*subpixel_y*/, |
| const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| const auto* src = static_cast<const Pixel*>(reference); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = static_cast<uint16_t*>(prediction); |
| const int compound_round_offset = |
| (1 << (bitdepth + 4)) + (1 << (bitdepth + 3)); |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| dest[x] = (src[x] << 4) + compound_round_offset; |
| } while (++x < width); |
| |
| src += src_stride; |
| dest += pred_stride; |
| } while (++y < height); |
| } |
| |
| // This function is a simplified version of ConvolveCompound2D_C. |
| // It is called when it is compound prediction mode, where only horizontal |
| // filtering is required. |
| // The output is not clipped to valid pixel range. Its output will be |
| // blended with another predictor to generate the final prediction of the block. |
| template <int bitdepth, typename Pixel> |
| void ConvolveCompoundHorizontal_C( |
| const void* const reference, const ptrdiff_t reference_stride, |
| const int horizontal_filter_index, const int /*vertical_filter_index*/, |
| const int inter_round_bits_vertical, const int subpixel_x, |
| const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int filter_index = GetFilterIndex(horizontal_filter_index, width); |
| const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset; |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = static_cast<uint16_t*>(prediction); |
| const int filter_id = (subpixel_x >> 6) & kSubPixelMask; |
| const int bits_shift = kFilterBits - inter_round_bits_vertical; |
| const int compound_round_offset = |
| (1 << (bitdepth + 4)) + (1 << (bitdepth + 3)); |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| int sum = 0; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k]; |
| } |
| sum = RightShiftWithRounding(sum, kRoundBitsHorizontal) << bits_shift; |
| dest[x] = sum + compound_round_offset; |
| } while (++x < width); |
| |
| src += src_stride; |
| dest += pred_stride; |
| } while (++y < height); |
| } |
| |
| // This function is a simplified version of ConvolveCompound2D_C. |
| // It is called when it is compound prediction mode, where only vertical |
| // filtering is required. |
| // The output is not clipped to valid pixel range. Its output will be |
| // blended with another predictor to generate the final prediction of the block. |
| template <int bitdepth, typename Pixel> |
| void ConvolveCompoundVertical_C(const void* const reference, |
| const ptrdiff_t reference_stride, |
| const int /*horizontal_filter_index*/, |
| const int vertical_filter_index, |
| const int inter_round_bits_vertical, |
| const int /*subpixel_x*/, const int subpixel_y, |
| const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, |
| void* prediction, const ptrdiff_t pred_stride) { |
| constexpr int kRoundBitsHorizontal = (bitdepth == 12) |
| ? kInterRoundBitsHorizontal12bpp |
| : kInterRoundBitsHorizontal; |
| const int filter_index = GetFilterIndex(vertical_filter_index, height); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| const auto* src = |
| static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride; |
| auto* dest = static_cast<uint16_t*>(prediction); |
| const int filter_id = (subpixel_y >> 6) & kSubPixelMask; |
| const int bits_shift = kFilterBits - kRoundBitsHorizontal; |
| const int compound_round_offset = |
| (1 << (bitdepth + 4)) + (1 << (bitdepth + 3)); |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| int sum = 0; |
| for (int k = 0; k < kSubPixelTaps; ++k) { |
| sum += kSubPixelFilters[filter_index][filter_id][k] * |
| src[k * src_stride + x]; |
| } |
| dest[x] = RightShiftWithRounding(LeftShift(sum, bits_shift), |
| inter_round_bits_vertical) + |
| compound_round_offset; |
| } while (++x < width); |
| |
| src += src_stride; |
| dest += pred_stride; |
| } while (++y < height); |
| } |
| |
| // This function is used when intra block copy is present. |
| // It is called when it is single prediction mode for U/V plane, where the |
| // reference block is from current frame and both horizontal and vertical |
| // filtering are required. |
| // The output is the single prediction of the block, clipped to valid pixel |
| // range. |
| template <int bitdepth, typename Pixel> |
| void ConvolveIntraBlockCopy2D_C( |
| const void* const reference, const ptrdiff_t reference_stride, |
| const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/, |
| const int /*inter_round_bits_vertical*/, const int /*subpixel_x*/, |
| const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| const auto* src = reinterpret_cast<const Pixel*>(reference); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = reinterpret_cast<Pixel*>(prediction); |
| const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel); |
| const int intermediate_height = height + 1; |
| uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * |
| (kMaxSuperBlockSizeInPixels + 1)]; |
| uint16_t* intermediate = intermediate_result; |
| // Note: allow vertical access to height + 1. Because this function is only |
| // for u/v plane of intra block copy, such access is guaranteed to be within |
| // the prediction block. |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| intermediate[x] = src[x] + src[x + 1]; |
| } while (++x < width); |
| |
| src += src_stride; |
| intermediate += width; |
| } while (++y < intermediate_height); |
| |
| intermediate = intermediate_result; |
| y = 0; |
| do { |
| int x = 0; |
| do { |
| dest[x] = static_cast<Pixel>( |
| RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2)); |
| } while (++x < width); |
| |
| intermediate += width; |
| dest += dest_stride; |
| } while (++y < height); |
| } |
| |
| // This function is used when intra block copy is present. |
| // It is called when it is single prediction mode for U/V plane, where the |
| // reference block is from the current frame and only horizontal or vertical |
| // filtering is required. |
| // The output is the single prediction of the block, clipped to valid pixel |
| // range. |
| // The filtering of intra block copy is simply the average of current and |
| // the next pixel. |
| template <int bitdepth, typename Pixel, bool is_horizontal> |
| void ConvolveIntraBlockCopy1D_C( |
| const void* const reference, const ptrdiff_t reference_stride, |
| const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/, |
| const int /*inter_round_bits_vertical*/, const int /*subpixel_x*/, |
| const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/, |
| const int width, const int height, void* prediction, |
| const ptrdiff_t pred_stride) { |
| const auto* src = reinterpret_cast<const Pixel*>(reference); |
| const ptrdiff_t src_stride = reference_stride / sizeof(Pixel); |
| auto* dest = reinterpret_cast<Pixel*>(prediction); |
| const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel); |
| const ptrdiff_t offset = is_horizontal ? 1 : src_stride; |
| int y = 0; |
| do { |
| int x = 0; |
| do { |
| dest[x] = static_cast<Pixel>( |
| RightShiftWithRounding(src[x] + src[x + offset], 1)); |
| } while (++x < width); |
| |
| src += src_stride; |
| dest += dest_stride; |
| } while (++y < height); |
| } |
| |
| void Init8bpp() { |
| Dsp* const dsp = dsp_internal::GetWritableDspTable(8); |
| assert(dsp != nullptr); |
| #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>; |
| dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>; |
| dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>; |
| dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>; |
| |
| dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>; |
| dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>; |
| dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>; |
| dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>; |
| |
| dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>; |
| dsp->convolve[1][0][0][1] = |
| ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>; |
| dsp->convolve[1][0][1][0] = |
| ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>; |
| dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>; |
| |
| dsp->convolve[1][1][0][0] = nullptr; |
| dsp->convolve[1][1][0][1] = nullptr; |
| dsp->convolve[1][1][1][0] = nullptr; |
| dsp->convolve[1][1][1][1] = nullptr; |
| |
| dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>; |
| dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>; |
| #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy |
| dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal |
| dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical |
| dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_Convolve2D |
| dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>; |
| #endif |
| |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy |
| dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal |
| dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical |
| dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D |
| dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>; |
| #endif |
| |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy |
| dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal |
| dsp->convolve[1][0][0][1] = |
| ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical |
| dsp->convolve[1][0][1][0] = |
| ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D |
| dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>; |
| #endif |
| |
| dsp->convolve[1][1][0][0] = nullptr; |
| dsp->convolve[1][1][0][1] = nullptr; |
| dsp->convolve[1][1][1][0] = nullptr; |
| dsp->convolve[1][1][1][1] = nullptr; |
| |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D |
| dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D |
| dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>; |
| #endif |
| #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| } |
| |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| void Init10bpp() { |
| Dsp* const dsp = dsp_internal::GetWritableDspTable(10); |
| assert(dsp != nullptr); |
| #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>; |
| dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>; |
| dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>; |
| dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>; |
| |
| dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>; |
| dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>; |
| dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>; |
| dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>; |
| |
| dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>; |
| dsp->convolve[1][0][0][1] = |
| ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>; |
| dsp->convolve[1][0][1][0] = |
| ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>; |
| dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>; |
| |
| dsp->convolve[1][1][0][0] = nullptr; |
| dsp->convolve[1][1][0][1] = nullptr; |
| dsp->convolve[1][1][1][0] = nullptr; |
| dsp->convolve[1][1][1][1] = nullptr; |
| |
| dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>; |
| dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>; |
| #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy |
| dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal |
| dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical |
| dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_Convolve2D |
| dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>; |
| #endif |
| |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy |
| dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal |
| dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical |
| dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D |
| dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>; |
| #endif |
| |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy |
| dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal |
| dsp->convolve[1][0][0][1] = |
| ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical |
| dsp->convolve[1][0][1][0] = |
| ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D |
| dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>; |
| #endif |
| |
| dsp->convolve[1][1][0][0] = nullptr; |
| dsp->convolve[1][1][0][1] = nullptr; |
| dsp->convolve[1][1][1][0] = nullptr; |
| dsp->convolve[1][1][1][1] = nullptr; |
| |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D |
| dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>; |
| #endif |
| #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D |
| dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>; |
| #endif |
| #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS |
| } |
| #endif |
| |
| } // namespace |
| |
| void ConvolveInit_C() { |
| Init8bpp(); |
| #if LIBGAV1_MAX_BITDEPTH >= 10 |
| Init10bpp(); |
| #endif |
| } |
| |
| } // namespace dsp |
| } // namespace libgav1 |