blob: 0fdebb71ebfdadf5a4ca3d5568779ac49bbe2de9 [file] [log] [blame]
#include "src/dsp/convolve.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
#include "src/utils/constants.h"
namespace libgav1 {
namespace dsp {
namespace {
constexpr int kSubPixelMask = (1 << kSubPixelBits) - 1;
constexpr int kHorizontalOffset = 3;
constexpr int kVerticalOffset = 3;
int GetFilterIndex(const int filter_index, const int length) {
if (length <= 4) {
if (filter_index == kInterpolationFilterEightTap ||
filter_index == kInterpolationFilterEightTapSharp) {
return 4;
}
if (filter_index == kInterpolationFilterEightTapSmooth) {
return 5;
}
}
return filter_index;
}
template <int bitdepth, typename Pixel>
void Convolve2DScaleSingle_C(
const void* const reference, const ptrdiff_t reference_stride,
const int horizontal_filter_index, const int vertical_filter_index,
const uint8_t inter_round_bits[2], const int subpixel_x,
const int subpixel_y, const int step_x, const int step_y, const int width,
const int height, void* prediction, const ptrdiff_t pred_stride) {
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
kScaleSubPixelBits) +
kSubPixelTaps;
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(2 * kMaxSuperBlockSizeInPixels + 8)];
const int intermediate_stride = kMaxSuperBlockSizeInPixels;
const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
const int max_pixel_value = (1 << bitdepth) - 1;
// Horizontal filter.
// Filter types used for width <= 4 are different from those for width > 4.
// When width > 4, the valid filter index range is always [0, 3].
// When width <= 4, the valid filter index range is always [4, 5].
// Similarly for height.
int filter_index = GetFilterIndex(horizontal_filter_index, width);
int16_t* intermediate = intermediate_result;
const auto* src = static_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const int ref_x = subpixel_x >> kScaleSubPixelBits;
// Note: assume the input src is already aligned to the correct start
// position.
for (int y = 0; y < intermediate_height; ++y) {
for (int x = 0, p = subpixel_x; x < width; ++x, p += step_x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << (bitdepth + kFilterBits - 1);
const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
const int filter_id = (p >> 6) & kSubPixelMask;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] * src_x[k];
}
assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
intermediate[x] = static_cast<int16_t>(
RightShiftWithRounding(sum, inter_round_bits[0]));
}
src += src_stride;
intermediate += intermediate_stride;
}
// Vertical filter.
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
const int offset_bits = bitdepth + 2 * kFilterBits - inter_round_bits[0];
for (int y = 0, p = subpixel_y & 1023; y < height; ++y, p += step_y) {
const int filter_id = (p >> 6) & kSubPixelMask;
for (int x = 0; x < width; ++x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << offset_bits;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum +=
kSubPixelFilters[filter_index][filter_id][k] *
intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
x];
}
assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
dest[x] = static_cast<Pixel>(
Clip3(RightShiftWithRounding(sum, inter_round_bits[1]) -
single_round_offset,
0, max_pixel_value));
}
dest += dest_stride;
}
}
template <int bitdepth, typename Pixel>
void Convolve2DScale_C(const void* const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const uint8_t inter_round_bits[2], const int subpixel_x,
const int subpixel_y, const int step_x, const int step_y,
const int width, const int height,
void* prediction, const ptrdiff_t pred_stride) {
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
kScaleSubPixelBits) +
kSubPixelTaps;
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(2 * kMaxSuperBlockSizeInPixels + 8)];
const int intermediate_stride = kMaxSuperBlockSizeInPixels;
// Horizontal filter.
// Filter types used for width <= 4 are different from those for width > 4.
// When width > 4, the valid filter index range is always [0, 3].
// When width <= 4, the valid filter index range is always [4, 5].
// Similarly for height.
int filter_index = GetFilterIndex(horizontal_filter_index, width);
int16_t* intermediate = intermediate_result;
const auto* src = static_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<uint16_t*>(prediction);
const int ref_x = subpixel_x >> kScaleSubPixelBits;
// Note: assume the input src is already aligned to the correct start
// position.
for (int y = 0; y < intermediate_height; ++y) {
for (int x = 0, p = subpixel_x; x < width; ++x, p += step_x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << (bitdepth + kFilterBits - 1);
const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
const int filter_id = (p >> 6) & kSubPixelMask;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] * src_x[k];
}
assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
intermediate[x] = static_cast<int16_t>(
RightShiftWithRounding(sum, inter_round_bits[0]));
}
src += src_stride;
intermediate += intermediate_stride;
}
// Vertical filter.
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
const int offset_bits = bitdepth + 2 * kFilterBits - inter_round_bits[0];
for (int y = 0, p = subpixel_y & 1023; y < height; ++y, p += step_y) {
const int filter_id = (p >> 6) & kSubPixelMask;
for (int x = 0; x < width; ++x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << offset_bits;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum +=
kSubPixelFilters[filter_index][filter_id][k] *
intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
x];
}
assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
dest[x] = static_cast<uint16_t>(
RightShiftWithRounding(sum, inter_round_bits[1]));
}
dest += pred_stride;
}
}
template <int bitdepth, typename Pixel>
void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const uint8_t inter_round_bits[2], const int subpixel_x,
const int subpixel_y, const int /*step_x*/,
const int /*step_y*/, const int width, const int height,
void* prediction, const ptrdiff_t pred_stride) {
const int intermediate_height = height + kSubPixelTaps - 1;
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(2 * kMaxSuperBlockSizeInPixels + 8)];
const int intermediate_stride = kMaxSuperBlockSizeInPixels;
// Horizontal filter.
// Filter types used for width <= 4 are different from those for width > 4.
// When width > 4, the valid filter index range is always [0, 3].
// When width <= 4, the valid filter index range is always [4, 5].
// Similarly for height.
int filter_index = GetFilterIndex(horizontal_filter_index, width);
int16_t* intermediate = intermediate_result;
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
const auto* src = static_cast<const Pixel*>(reference) -
kVerticalOffset * src_stride - kHorizontalOffset;
auto* dest = static_cast<uint16_t*>(prediction);
int filter_id = (subpixel_x >> 6) & kSubPixelMask;
for (int y = 0; y < intermediate_height; ++y) {
for (int x = 0; x < width; ++x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << (bitdepth + kFilterBits - 1);
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
intermediate[x] = static_cast<int16_t>(
RightShiftWithRounding(sum, inter_round_bits[0]));
}
src += src_stride;
intermediate += intermediate_stride;
}
// Vertical filter.
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
const int offset_bits = bitdepth + 2 * kFilterBits - inter_round_bits[0];
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << offset_bits;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] *
intermediate[k * intermediate_stride + x];
}
assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
dest[x] = static_cast<uint16_t>(
RightShiftWithRounding(sum, inter_round_bits[1]));
}
dest += pred_stride;
intermediate += intermediate_stride;
}
}
// This function is a simplified version of Convolve2D_C.
// It is called when it is single prediction mode, where only horizontal
// filtering is required.
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
void Convolve2DSingle_C(const void* const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const uint8_t inter_round_bits[2], const int subpixel_x,
const int subpixel_y, const int /*step_x*/,
const int /*step_y*/, const int width, const int height,
void* prediction, const ptrdiff_t pred_stride) {
const int intermediate_height = height + kSubPixelTaps - 1;
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(2 * kMaxSuperBlockSizeInPixels + 8)];
const int intermediate_stride = kMaxSuperBlockSizeInPixels;
const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
const int max_pixel_value = (1 << bitdepth) - 1;
// Horizontal filter.
// Filter types used for width <= 4 are different from those for width > 4.
// When width > 4, the valid filter index range is always [0, 3].
// When width <= 4, the valid filter index range is always [4, 5].
// Similarly for height.
int filter_index = GetFilterIndex(horizontal_filter_index, width);
int16_t* intermediate = intermediate_result;
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
const auto* src = static_cast<const Pixel*>(reference) -
kVerticalOffset * src_stride - kHorizontalOffset;
auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
int filter_id = (subpixel_x >> 6) & kSubPixelMask;
for (int y = 0; y < intermediate_height; ++y) {
for (int x = 0; x < width; ++x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << (bitdepth + kFilterBits - 1);
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
intermediate[x] = static_cast<int16_t>(
RightShiftWithRounding(sum, inter_round_bits[0]));
}
src += src_stride;
intermediate += intermediate_stride;
}
// Vertical filter.
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
const int offset_bits = bitdepth + 2 * kFilterBits - inter_round_bits[0];
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
// An offset to guarantee the sum is non negative.
int sum = 1 << offset_bits;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] *
intermediate[k * intermediate_stride + x];
}
assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
dest[x] = static_cast<Pixel>(
Clip3(RightShiftWithRounding(sum, inter_round_bits[1]) -
single_round_offset,
0, max_pixel_value));
}
dest += dest_stride;
intermediate += intermediate_stride;
}
}
// This function is a simplified version of Convolve2D_C.
// It is called when it is single prediction mode, where only horizontal
// filtering is required.
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
void ConvolveHorizontal_C(const void* const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int /*vertical_filter_index*/,
const uint8_t inter_round_bits[2],
const int subpixel_x, const int /*subpixel_y*/,
const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
const int bits = kFilterBits - inter_round_bits[0];
const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
const int max_pixel_value = (1 << bitdepth) - 1;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
sum = RightShiftWithRounding(sum, inter_round_bits[0]);
dest[x] = static_cast<Pixel>(
Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value));
}
src += src_stride;
dest += dest_stride;
}
}
// This function is a simplified version of Convolve2D_C.
// It is called when it is single prediction mode, where only vertical
// filtering is required.
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
void ConvolveVertical_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int vertical_filter_index,
const uint8_t /*inter_round_bits*/[2],
const int /*subpixel_x*/, const int subpixel_y,
const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
const auto* src =
static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
const int max_pixel_value = (1 << bitdepth) - 1;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] *
src[k * src_stride + x];
}
dest[x] = static_cast<Pixel>(
Clip3(RightShiftWithRounding(sum, kFilterBits), 0, max_pixel_value));
}
src += src_stride;
dest += dest_stride;
}
}
template <int bitdepth, typename Pixel>
void ConvolveCopy_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int /*vertical_filter_index*/,
const uint8_t /*inter_round_bits*/[2],
const int /*subpixel_x*/, const int /*subpixel_y*/,
const int /*step_x*/, const int /*step_y*/, const int width,
const int height, void* prediction,
const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
for (int y = 0; y < height; ++y) {
memcpy(dest, src, width * sizeof(Pixel));
src += reference_stride;
dest += pred_stride;
}
}
template <int bitdepth, typename Pixel>
void ConvolveCompoundCopy_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int /*vertical_filter_index*/,
const uint8_t /*inter_round_bits*/[2],
const int /*subpixel_x*/, const int /*subpixel_y*/,
const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const auto* src = static_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<uint16_t*>(prediction);
const int compound_round_offset =
(1 << (bitdepth + 4)) + (1 << (bitdepth + 3));
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
dest[x] = (src[x] << 4) + compound_round_offset;
}
src += src_stride;
dest += pred_stride;
}
}
// This function is a simplified version of Convolve2D_C.
// It is called when it is compound prediction mode, where only horizontal
// filtering is required.
// The output is not clipped to valid pixel range. Its output will be
// blended with another predictor to generate the final prediction of the block.
template <int bitdepth, typename Pixel>
void ConvolveCompoundHorizontal_C(
const void* const reference, const ptrdiff_t reference_stride,
const int horizontal_filter_index, const int /*vertical_filter_index*/,
const uint8_t inter_round_bits[2], const int subpixel_x,
const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<uint16_t*>(prediction);
const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
const int bits_shift = kFilterBits - inter_round_bits[1];
const int compound_round_offset =
(1 << (bitdepth + 4)) + (1 << (bitdepth + 3));
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
sum = RightShiftWithRounding(sum, inter_round_bits[0]) << bits_shift;
dest[x] = sum + compound_round_offset;
}
src += src_stride;
dest += pred_stride;
}
}
// This function is a simplified version of Convolve2D_C.
// It is called when it is compound prediction mode, where only vertical
// filtering is required.
// The output is not clipped to valid pixel range. Its output will be
// blended with another predictor to generate the final prediction of the block.
template <int bitdepth, typename Pixel>
void ConvolveCompoundVertical_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int vertical_filter_index,
const uint8_t inter_round_bits[2],
const int /*subpixel_x*/, const int subpixel_y,
const int /*step_x*/, const int /*step_y*/,
const int width, const int height,
void* prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
const auto* src =
static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
auto* dest = static_cast<uint16_t*>(prediction);
const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
const int bits_shift = kFilterBits - inter_round_bits[0];
const int compound_round_offset =
(1 << (bitdepth + 4)) + (1 << (bitdepth + 3));
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum += kSubPixelFilters[filter_index][filter_id][k] *
src[k * src_stride + x];
}
dest[x] = RightShiftWithRounding(LeftShift(sum, bits_shift),
inter_round_bits[1]) +
compound_round_offset;
}
src += src_stride;
dest += pred_stride;
}
}
// This function is used when intra block copy is present.
// It is called when it is single prediction mode for U/V plane, where the
// reference block is from current frame and both horizontal and vertical
// filtering are required.
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
void ConvolveIntraBlockCopy2D_C(
const void* const reference, const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
const uint8_t /*inter_round_bits*/[2], const int /*subpixel_x*/,
const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const auto* src = reinterpret_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = reinterpret_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(2 * kMaxSuperBlockSizeInPixels + 8)];
uint16_t* intermediate = intermediate_result;
// Note: allow vertical access to height + 1. Because this function is only
// for u/v plane of intra block copy, such access is guaranteed to be within
// the prediction block.
for (int y = 0; y <= height; ++y) {
for (int x = 0; x < width; ++x) {
intermediate[x] = src[x] + src[x + 1];
}
src += src_stride;
intermediate += width;
}
intermediate = intermediate_result;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
dest[x] = static_cast<Pixel>(
RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2));
}
intermediate += width;
dest += dest_stride;
}
}
// This function is used when intra block copy is present.
// It is called when it is single prediction mode for U/V plane, where the
// reference block is from the current frame and only horizontal or vertical
// filtering is required.
// The output is the single prediction of the block, clipped to valid pixel
// range.
// The filtering of intra block copy is simply the average of current and
// the next pixel.
template <int bitdepth, typename Pixel, bool is_horizontal>
void ConvolveIntraBlockCopy1D_C(
const void* const reference, const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
const uint8_t /*inter_round_bits*/[2], const int /*subpixel_x*/,
const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const auto* src = reinterpret_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = reinterpret_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
dest[x] = static_cast<Pixel>(
RightShiftWithRounding(src[x] + src[x + offset], 1));
}
src += src_stride;
dest += dest_stride;
}
}
void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
assert(dsp != nullptr);
dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
dsp->convolve[0][0][1][1] = Convolve2DSingle_C<8, uint8_t>;
dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
dsp->convolve[0][1][1][1] = Convolve2D_C<8, uint8_t>;
dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
dsp->convolve[1][0][0][1] =
ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
dsp->convolve[1][0][1][0] =
ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
dsp->convolve[1][1][0][0] = nullptr;
dsp->convolve[1][1][0][1] = nullptr;
dsp->convolve[1][1][1][0] = nullptr;
dsp->convolve[1][1][1][1] = nullptr;
dsp->convolve_scale[0] = Convolve2DScaleSingle_C<8, uint8_t>;
dsp->convolve_scale[1] = Convolve2DScale_C<8, uint8_t>;
}
#if LIBGAV1_MAX_BITDEPTH >= 10
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
assert(dsp != nullptr);
dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
dsp->convolve[0][0][1][1] = Convolve2DSingle_C<10, uint16_t>;
dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
dsp->convolve[0][1][1][1] = Convolve2D_C<10, uint16_t>;
dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
dsp->convolve[1][0][0][1] =
ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
dsp->convolve[1][0][1][0] =
ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
dsp->convolve[1][1][0][0] = nullptr;
dsp->convolve[1][1][0][1] = nullptr;
dsp->convolve[1][1][1][0] = nullptr;
dsp->convolve[1][1][1][1] = nullptr;
dsp->convolve_scale[0] = Convolve2DScaleSingle_C<10, uint16_t>;
dsp->convolve_scale[1] = Convolve2DScale_C<10, uint16_t>;
}
#endif
} // namespace
void ConvolveInit_C() {
Init8bpp();
#if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp();
#endif
}
} // namespace dsp
} // namespace libgav1