blob: 554e53703b403c4eca1bc2477326108d92832512 [file] [log] [blame]
// Copyright 2020 The libgav1 Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/post_filter.h"
#include "src/utils/blocking_counter.h"
namespace libgav1 {
void PostFilter::ApplySuperRes(const std::array<uint8_t*, kMaxPlanes>& src,
const std::array<int, kMaxPlanes>& rows,
const int line_buffer_row,
const std::array<uint8_t*, kMaxPlanes>& dst,
bool dst_is_loop_restoration_border /*=false*/) {
int plane = kPlaneY;
do {
const int plane_width =
MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
#if LIBGAV1_MAX_BITDEPTH >= 10
if (bitdepth_ >= 10) {
auto* input = reinterpret_cast<uint16_t*>(src[plane]);
auto* output = reinterpret_cast<uint16_t*>(dst[plane]);
const ptrdiff_t input_stride =
frame_buffer_.stride(plane) / sizeof(uint16_t);
const ptrdiff_t output_stride =
(dst_is_loop_restoration_border
? loop_restoration_border_.stride(plane)
: frame_buffer_.stride(plane)) /
sizeof(uint16_t);
if (rows[plane] > 0) {
dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
input, input_stride, rows[plane], plane_width,
super_res_info_[plane].upscaled_width,
super_res_info_[plane].initial_subpixel_x,
super_res_info_[plane].step, output, output_stride);
}
// In the multi-threaded case, the |superres_line_buffer_| holds the last
// input row. Apply SuperRes for that row.
if (line_buffer_row >= 0) {
auto* const line_buffer_start =
reinterpret_cast<uint16_t*>(superres_line_buffer_.data(plane)) +
line_buffer_row * superres_line_buffer_.stride(plane) /
sizeof(uint16_t) +
kSuperResHorizontalBorder;
dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
line_buffer_start, /*source_stride=*/0,
/*height=*/1, plane_width,
super_res_info_[plane].upscaled_width,
super_res_info_[plane].initial_subpixel_x,
super_res_info_[plane].step,
output + rows[plane] * output_stride, /*dest_stride=*/0);
}
continue;
}
#endif // LIBGAV1_MAX_BITDEPTH >= 10
uint8_t* input = src[plane];
uint8_t* output = dst[plane];
const ptrdiff_t input_stride = frame_buffer_.stride(plane);
const ptrdiff_t output_stride = dst_is_loop_restoration_border
? loop_restoration_border_.stride(plane)
: frame_buffer_.stride(plane);
if (rows[plane] > 0) {
dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
input, input_stride, rows[plane], plane_width,
super_res_info_[plane].upscaled_width,
super_res_info_[plane].initial_subpixel_x,
super_res_info_[plane].step, output, output_stride);
}
// In the multi-threaded case, the |superres_line_buffer_| holds the last
// input row. Apply SuperRes for that row.
if (line_buffer_row >= 0) {
uint8_t* const line_buffer_start =
superres_line_buffer_.data(plane) +
line_buffer_row * superres_line_buffer_.stride(plane) +
kSuperResHorizontalBorder;
dsp_.super_res(
superres_coefficients_[static_cast<int>(plane != 0)],
line_buffer_start, /*source_stride=*/0,
/*height=*/1, plane_width, super_res_info_[plane].upscaled_width,
super_res_info_[plane].initial_subpixel_x,
super_res_info_[plane].step, output + rows[plane] * output_stride,
/*dest_stride=*/0);
}
} while (++plane < planes_);
}
void PostFilter::ApplySuperResForOneSuperBlockRow(int row4x4_start, int sb4x4,
bool is_last_row) {
assert(row4x4_start >= 0);
assert(DoSuperRes());
// If not doing cdef, then LR needs two rows of border with superres applied.
const int num_rows_extra = (DoCdef() || !DoRestoration()) ? 0 : 2;
std::array<uint8_t*, kMaxPlanes> src;
std::array<uint8_t*, kMaxPlanes> dst;
std::array<int, kMaxPlanes> rows;
const int num_rows4x4 =
std::min(sb4x4, frame_header_.rows4x4 - row4x4_start) -
(is_last_row ? 0 : 2);
if (row4x4_start > 0) {
const int row4x4 = row4x4_start - 2;
int plane = kPlaneY;
do {
const int row =
(MultiplyBy4(row4x4) >> subsampling_y_[plane]) + num_rows_extra;
const ptrdiff_t row_offset = row * frame_buffer_.stride(plane);
src[plane] = cdef_buffer_[plane] + row_offset;
dst[plane] = superres_buffer_[plane] + row_offset;
// Note that the |num_rows_extra| subtraction is done after the value is
// subsampled since we always need to work on |num_rows_extra| extra rows
// irrespective of the plane subsampling.
// Apply superres for the last 8-|num_rows_extra| rows of the previous
// superblock.
rows[plane] = (8 >> subsampling_y_[plane]) - num_rows_extra;
// Apply superres for the current superblock row (except for the last
// 8-|num_rows_extra| rows).
rows[plane] += (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
(is_last_row ? 0 : num_rows_extra);
} while (++plane < planes_);
} else {
// Apply superres for the current superblock row (except for the last
// 8-|num_rows_extra| rows).
int plane = kPlaneY;
do {
const ptrdiff_t row_offset =
(MultiplyBy4(row4x4_start) >> subsampling_y_[plane]) *
frame_buffer_.stride(plane);
src[plane] = cdef_buffer_[plane] + row_offset;
dst[plane] = superres_buffer_[plane] + row_offset;
// Note that the |num_rows_extra| addition is done after the value is
// subsampled since we always need to work on |num_rows_extra| extra rows
// irrespective of the plane subsampling.
rows[plane] = (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
(is_last_row ? 0 : num_rows_extra);
} while (++plane < planes_);
}
ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst);
}
void PostFilter::ApplySuperResThreaded() {
int num_threads = thread_pool_->num_threads() + 1;
// The number of rows that will be processed by each thread in the thread pool
// (other than the current thread).
int thread_pool_rows = height_ / num_threads;
thread_pool_rows = std::max(thread_pool_rows, 1);
// Make rows of Y plane even when there is subsampling for the other planes.
if ((thread_pool_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
++thread_pool_rows;
}
// Adjust the number of threads to what we really need.
num_threads = Clip3(height_ / thread_pool_rows, 1, num_threads);
// For the current thread, we round up to process all the remaining rows.
int current_thread_rows = height_ - thread_pool_rows * (num_threads - 1);
// Make rows of Y plane even when there is subsampling for the other planes.
if ((current_thread_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
++current_thread_rows;
}
assert(current_thread_rows > 0);
BlockingCounter pending_workers(num_threads - 1);
for (int line_buffer_row = 0, row_start = 0; line_buffer_row < num_threads;
++line_buffer_row, row_start += thread_pool_rows) {
std::array<uint8_t*, kMaxPlanes> src;
std::array<uint8_t*, kMaxPlanes> dst;
std::array<int, kMaxPlanes> rows;
int plane = kPlaneY;
const int pixel_size_log2 = pixel_size_log2_;
do {
src[plane] =
GetBufferOffset(cdef_buffer_[plane], frame_buffer_.stride(plane),
static_cast<Plane>(plane), row_start, 0);
dst[plane] =
GetBufferOffset(superres_buffer_[plane], frame_buffer_.stride(plane),
static_cast<Plane>(plane), row_start, 0);
rows[plane] =
(((line_buffer_row < num_threads - 1) ? thread_pool_rows
: current_thread_rows) >>
subsampling_y_[plane]) -
1;
const int plane_width =
MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
uint8_t* const input =
src[plane] + rows[plane] * frame_buffer_.stride(plane);
uint8_t* const line_buffer_start =
superres_line_buffer_.data(plane) +
line_buffer_row * superres_line_buffer_.stride(plane) +
(kSuperResHorizontalBorder << pixel_size_log2);
memcpy(line_buffer_start, input, plane_width << pixel_size_log2);
} while (++plane < planes_);
if (line_buffer_row < num_threads - 1) {
thread_pool_->Schedule(
[this, src, rows, line_buffer_row, dst, &pending_workers]() {
ApplySuperRes(src, rows, line_buffer_row, dst);
pending_workers.Decrement();
});
} else {
ApplySuperRes(src, rows, line_buffer_row, dst);
}
}
// Wait for the threadpool jobs to finish.
pending_workers.Wait();
}
} // namespace libgav1