blob: 6d5ef3182820568cc5657e17a75c1b44a6d1bc72 [file] [log] [blame]
// Copyright 2019 The libgav1 Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/post_filter.h"
#include <algorithm>
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/array_2d.h"
#include "src/utils/constants.h"
#include "src/utils/memory.h"
#include "src/utils/types.h"
namespace libgav1 {
namespace {
// Import all the constants in the anonymous namespace.
#include "src/post_filter/deblock_thresholds.inc"
// Row indices of deblocked pixels needed by loop restoration. This is used to
// populate the |deblock_buffer_| when cdef is on. The first dimension is
// subsampling_y.
constexpr int kDeblockedRowsForLoopRestoration[2][4] = {{54, 55, 56, 57},
{26, 27, 28, 29}};
} // namespace
// The following example illustrates how ExtendFrame() extends a frame.
// Suppose the frame width is 8 and height is 4, and left, right, top, and
// bottom are all equal to 3.
//
// Before:
//
// ABCDEFGH
// IJKLMNOP
// QRSTUVWX
// YZabcdef
//
// After:
//
// AAA|ABCDEFGH|HHH [3]
// AAA|ABCDEFGH|HHH
// AAA|ABCDEFGH|HHH
// ---+--------+---
// AAA|ABCDEFGH|HHH [1]
// III|IJKLMNOP|PPP
// QQQ|QRSTUVWX|XXX
// YYY|YZabcdef|fff
// ---+--------+---
// YYY|YZabcdef|fff [2]
// YYY|YZabcdef|fff
// YYY|YZabcdef|fff
//
// ExtendFrame() first extends the rows to the left and to the right[1]. Then
// it copies the extended last row to the bottom borders[2]. Finally it copies
// the extended first row to the top borders[3].
// static
template <typename Pixel>
void PostFilter::ExtendFrame(Pixel* const frame_start, const int width,
const int height, const ptrdiff_t stride,
const int left, const int right, const int top,
const int bottom) {
const Pixel* src = frame_start;
Pixel* dst = frame_start - left;
// Copy to left and right borders.
for (int y = 0; y < height; ++y) {
Memset(dst, src[0], left);
Memset(dst + left + width, src[width - 1], right);
src += stride;
dst += stride;
}
// Copy to bottom borders. For performance we copy |stride| pixels
// (including some padding pixels potentially) in each row, ending at the
// bottom right border pixel. In the diagram the asterisks indicate padding
// pixels.
//
// |<--- stride --->|
// **YYY|YZabcdef|fff <-- Copy from the extended last row.
// -----+--------+---
// **YYY|YZabcdef|fff
// **YYY|YZabcdef|fff
// **YYY|YZabcdef|fff <-- bottom right border pixel
assert(src == frame_start + height * stride);
dst = const_cast<Pixel*>(src) + width + right - stride;
src = dst - stride;
for (int y = 0; y < bottom; ++y) {
memcpy(dst, src, sizeof(Pixel) * stride);
dst += stride;
}
// Copy to top borders. For performance we copy |stride| pixels (including
// some padding pixels potentially) in each row, starting from the top left
// border pixel. In the diagram the asterisks indicate padding pixels.
//
// +-- top left border pixel
// |
// v
// AAA|ABCDEFGH|HHH**
// AAA|ABCDEFGH|HHH**
// AAA|ABCDEFGH|HHH**
// ---+--------+-----
// AAA|ABCDEFGH|HHH** <-- Copy from the extended first row.
// |<--- stride --->|
src = frame_start - left;
dst = frame_start - left - top * stride;
for (int y = 0; y < top; ++y) {
memcpy(dst, src, sizeof(Pixel) * stride);
dst += stride;
}
}
template void PostFilter::ExtendFrame<uint8_t>(uint8_t* const frame_start,
const int width,
const int height,
const ptrdiff_t stride,
const int left, const int right,
const int top, const int bottom);
#if LIBGAV1_MAX_BITDEPTH >= 10
template void PostFilter::ExtendFrame<uint16_t>(
uint16_t* const frame_start, const int width, const int height,
const ptrdiff_t stride, const int left, const int right, const int top,
const int bottom);
#endif
PostFilter::PostFilter(const ObuFrameHeader& frame_header,
const ObuSequenceHeader& sequence_header,
FrameScratchBuffer* const frame_scratch_buffer,
YuvBuffer* const frame_buffer, const dsp::Dsp* dsp,
int do_post_filter_mask)
: frame_header_(frame_header),
loop_restoration_(frame_header.loop_restoration),
dsp_(*dsp),
// Deblocking filter always uses 64x64 as step size.
num_64x64_blocks_per_row_(DivideBy64(frame_header.width + 63)),
upscaled_width_(frame_header.upscaled_width),
width_(frame_header.width),
height_(frame_header.height),
bitdepth_(sequence_header.color_config.bitdepth),
subsampling_x_{0, sequence_header.color_config.subsampling_x,
sequence_header.color_config.subsampling_x},
subsampling_y_{0, sequence_header.color_config.subsampling_y,
sequence_header.color_config.subsampling_y},
planes_(sequence_header.color_config.is_monochrome ? kMaxPlanesMonochrome
: kMaxPlanes),
pixel_size_(static_cast<int>((bitdepth_ == 8) ? sizeof(uint8_t)
: sizeof(uint16_t))),
inner_thresh_(kInnerThresh[frame_header.loop_filter.sharpness]),
outer_thresh_(kOuterThresh[frame_header.loop_filter.sharpness]),
needs_chroma_deblock_(frame_header.loop_filter.level[kPlaneU + 1] != 0 ||
frame_header.loop_filter.level[kPlaneV + 1] != 0),
cdef_index_(frame_scratch_buffer->cdef_index),
inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
threaded_window_buffer_(
frame_scratch_buffer->threaded_window_buffer.get()),
restoration_info_(&frame_scratch_buffer->loop_restoration_info),
superres_line_buffer_(frame_scratch_buffer->superres_line_buffer.get()),
block_parameters_(frame_scratch_buffer->block_parameters_holder),
frame_buffer_(*frame_buffer),
deblock_buffer_(frame_scratch_buffer->deblock_buffer),
do_post_filter_mask_(do_post_filter_mask),
thread_pool_(
frame_scratch_buffer->threading_strategy.post_filter_thread_pool()),
window_buffer_width_(GetWindowBufferWidth(thread_pool_, frame_header)),
window_buffer_height_(GetWindowBufferHeight(thread_pool_, frame_header)) {
const int8_t zero_delta_lf[kFrameLfCount] = {};
ComputeDeblockFilterLevels(zero_delta_lf, deblock_filter_levels_);
if (DoSuperRes()) {
for (int plane = 0; plane < planes_; ++plane) {
const int downscaled_width =
RightShiftWithRounding(width_, subsampling_x_[plane]);
const int upscaled_width =
RightShiftWithRounding(upscaled_width_, subsampling_x_[plane]);
const int superres_width = downscaled_width << kSuperResScaleBits;
super_res_info_[plane].step =
(superres_width + upscaled_width / 2) / upscaled_width;
const int error =
super_res_info_[plane].step * upscaled_width - superres_width;
super_res_info_[plane].initial_subpixel_x =
((-((upscaled_width - downscaled_width) << (kSuperResScaleBits - 1)) +
DivideBy2(upscaled_width)) /
upscaled_width +
(1 << (kSuperResExtraBits - 1)) - error / 2) &
kSuperResScaleMask;
super_res_info_[plane].upscaled_width = upscaled_width;
}
}
for (int plane = 0; plane < planes_; ++plane) {
loop_restoration_buffer_[plane] = frame_buffer_.data(plane);
cdef_buffer_[plane] = frame_buffer_.data(plane);
superres_buffer_[plane] = frame_buffer_.data(plane);
source_buffer_[plane] = frame_buffer_.data(plane);
}
// In single threaded mode, we apply SuperRes without making a copy of the
// input row by writing the output to one row to the top (we refer to this
// process as "in place superres" in our code).
const bool in_place_superres = DoSuperRes() && thread_pool_ == nullptr;
if (DoCdef() || DoRestoration() || in_place_superres) {
for (int plane = 0; plane < planes_; ++plane) {
int horizontal_shift = 0;
int vertical_shift = 0;
if (DoRestoration() &&
loop_restoration_.type[plane] != kLoopRestorationTypeNone) {
horizontal_shift += frame_buffer_.alignment();
if (!DoCdef()) {
vertical_shift += kRestorationVerticalBorder;
}
superres_buffer_[plane] +=
vertical_shift * frame_buffer_.stride(plane) +
horizontal_shift * pixel_size_;
}
if (in_place_superres) {
vertical_shift += kSuperResVerticalBorder;
}
cdef_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
horizontal_shift * pixel_size_;
if (DoCdef()) {
horizontal_shift += frame_buffer_.alignment();
vertical_shift += kCdefBorder;
}
assert(horizontal_shift <= frame_buffer_.right_border(plane));
assert(vertical_shift <= frame_buffer_.bottom_border(plane));
source_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
horizontal_shift * pixel_size_;
}
}
}
void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
const int width, const int height,
const ptrdiff_t stride, const int left,
const int right, const int top,
const int bottom) const {
#if LIBGAV1_MAX_BITDEPTH >= 10
if (bitdepth_ >= 10) {
ExtendFrame<uint16_t>(reinterpret_cast<uint16_t*>(frame_start), width,
height, stride / sizeof(uint16_t), left, right, top,
bottom);
return;
}
#endif
ExtendFrame<uint8_t>(frame_start, width, height, stride, left, right, top,
bottom);
}
void PostFilter::ExtendBordersForReferenceFrame() {
if (frame_header_.refresh_frame_flags == 0) return;
for (int plane = kPlaneY; plane < planes_; ++plane) {
const int plane_width =
RightShiftWithRounding(upscaled_width_, subsampling_x_[plane]);
const int plane_height =
RightShiftWithRounding(height_, subsampling_y_[plane]);
assert(frame_buffer_.left_border(plane) >= kMinLeftBorderPixels &&
frame_buffer_.right_border(plane) >= kMinRightBorderPixels &&
frame_buffer_.top_border(plane) >= kMinTopBorderPixels &&
frame_buffer_.bottom_border(plane) >= kMinBottomBorderPixels);
// plane subsampling_x_ left_border
// Y N/A 64, 48
// U,V 0 64, 48
// U,V 1 32, 16
assert(frame_buffer_.left_border(plane) >= 16);
// The |left| argument to ExtendFrameBoundary() must be at least
// kMinLeftBorderPixels (13) for warp.
static_assert(16 >= kMinLeftBorderPixels, "");
ExtendFrameBoundary(
frame_buffer_.data(plane), plane_width, plane_height,
frame_buffer_.stride(plane), frame_buffer_.left_border(plane),
frame_buffer_.right_border(plane), frame_buffer_.top_border(plane),
frame_buffer_.bottom_border(plane));
}
}
void PostFilter::CopyDeblockedPixels(Plane plane, int row4x4) {
const ptrdiff_t src_stride = frame_buffer_.stride(plane);
const uint8_t* const src =
GetSourceBuffer(static_cast<Plane>(plane), row4x4, 0);
const ptrdiff_t dst_stride = deblock_buffer_.stride(plane);
const int row_offset = DivideBy4(row4x4);
uint8_t* dst = deblock_buffer_.data(plane) + dst_stride * row_offset;
const int num_pixels = SubsampledValue(MultiplyBy4(frame_header_.columns4x4),
subsampling_x_[plane]);
int last_valid_row = -1;
const int plane_height =
SubsampledValue(frame_header_.height, subsampling_y_[plane]);
for (int i = 0; i < 4; ++i) {
int row = kDeblockedRowsForLoopRestoration[subsampling_y_[plane]][i];
const int absolute_row =
(MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
if (absolute_row >= plane_height) {
if (last_valid_row == -1) {
// We have run out of rows and there no valid row to copy. This will not
// be used by loop restoration, so we can simply break here. However,
// MSAN does not know that this is never used (since we sometimes apply
// superres to this row as well). So zero it out in case of MSAN.
#if LIBGAV1_MSAN
if (DoSuperRes()) {
memset(dst, 0, num_pixels * pixel_size_);
dst += dst_stride;
continue;
}
#endif
break;
}
// If we run out of rows, copy the last valid row (mimics the bottom
// border extension).
row = last_valid_row;
}
memcpy(dst, src + src_stride * row, num_pixels * pixel_size_);
last_valid_row = row;
dst += dst_stride;
}
}
void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
bool for_loop_restoration) {
// Number of rows to be subtracted from the start position described by
// row4x4. We always lag by 8 rows (to account for in-loop post filters).
const int row_offset = (row4x4 == 0) ? 0 : 8;
// Number of rows to be subtracted from the height described by sb4x4.
const int height_offset = (row4x4 == 0) ? 8 : 0;
// If cdef is off, then loop restoration needs 2 extra rows for the bottom
// border in each plane.
const int extra_rows = (for_loop_restoration && !DoCdef()) ? 2 : 0;
for (int plane = 0; plane < planes_; ++plane) {
const int plane_width =
RightShiftWithRounding(upscaled_width_, subsampling_x_[plane]);
const int plane_height =
RightShiftWithRounding(height_, subsampling_y_[plane]);
const int row = (MultiplyBy4(row4x4) - row_offset) >> subsampling_y_[plane];
assert(row >= 0);
if (row >= plane_height) break;
const int num_rows =
std::min(RightShiftWithRounding(MultiplyBy4(sb4x4) - height_offset,
subsampling_y_[plane]) +
extra_rows,
plane_height - row);
// We only need to track the progress of the Y plane since the progress of
// the U and V planes will be inferred from the progress of the Y plane.
if (!for_loop_restoration && plane == kPlaneY) {
progress_row_ = row + num_rows;
}
const bool copy_bottom = row + num_rows == plane_height;
const int stride = frame_buffer_.stride(plane);
uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane]
: frame_buffer_.data(plane)) +
row * stride;
const int left_border = for_loop_restoration
? kRestorationHorizontalBorder
: frame_buffer_.left_border(plane);
const int right_border = for_loop_restoration
? kRestorationHorizontalBorder
: frame_buffer_.right_border(plane);
const int top_border =
(row == 0) ? (for_loop_restoration ? kRestorationVerticalBorder
: frame_buffer_.top_border(plane))
: 0;
const int bottom_border =
copy_bottom
? (for_loop_restoration ? kRestorationVerticalBorder
: frame_buffer_.bottom_border(plane))
: 0;
ExtendFrameBoundary(start, plane_width, num_rows, stride, left_border,
right_border, top_border, bottom_border);
}
}
void PostFilter::ApplyFilteringThreaded() {
if (DoDeblock()) ApplyDeblockFilterThreaded();
if (DoCdef() && DoRestoration()) {
for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
row4x4 += kNum4x4InLoopFilterUnit) {
SetupDeblockBuffer(row4x4, kNum4x4InLoopFilterUnit);
}
}
if (DoCdef()) ApplyCdef();
if (DoSuperRes()) ApplySuperResThreaded();
if (DoRestoration()) ApplyLoopRestoration();
ExtendBordersForReferenceFrame();
}
int PostFilter::ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4,
bool is_last_row,
bool do_deblock) {
if (row4x4 < 0) return -1;
if (DoDeblock() && do_deblock) {
ApplyDeblockFilterForOneSuperBlockRow(row4x4, sb4x4);
}
if (DoRestoration() && DoCdef()) {
SetupDeblockBuffer(row4x4, sb4x4);
}
if (DoCdef()) {
ApplyCdefForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
}
if (DoSuperRes()) {
ApplySuperResForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
}
if (DoRestoration()) {
CopyBordersForOneSuperBlockRow(row4x4, sb4x4, true);
ApplyLoopRestoration(row4x4, sb4x4);
if (is_last_row) {
// Loop restoration operates with a lag of 8 rows. So make sure to cover
// all the rows of the last superblock row.
CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, true);
ApplyLoopRestoration(row4x4 + sb4x4, 16);
}
}
if (frame_header_.refresh_frame_flags != 0 && DoBorderExtensionInLoop()) {
CopyBordersForOneSuperBlockRow(row4x4, sb4x4, false);
if (is_last_row) {
CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, false);
}
}
if (is_last_row && !DoBorderExtensionInLoop()) {
ExtendBordersForReferenceFrame();
}
return is_last_row ? height_ : progress_row_;
}
} // namespace libgav1