Consider quantization factor in plane-wise filter.
In plane-wise temporal filtering strategy, filter weight is assigned
independently from the quantization factor used in video compression.
However, when we use small q (i.e., with high bitrate), a weaker
filtering strength is expected since we would like to keep more details.
This CL improves the plane-wise strategy by considering the quantization
factor for filter weight assignment. In particular, when q is large
enough (>=16), nothing is changed. When q is less than 16, we will
reduce the filtering strength w.r.t. the q-value. This change
significantly improves the performance on high-bitrate encoding.
NOTE: This CL only affects the performance on midres and hdres datasets.
Experimental results:
Under Speed-4 (two-pass mode):
avg PSNR ovr PSNR SSIM
midres -0.116 -0.099 -0.040
midres2 -0.047 -0.044 -0.017
hdres -0.142 -0.197 -0.075
hdres2 -0.010 -0.012 -0.004
Under Speed-1 (two-pass mode):
avg PSNR ovr PSNR SSIM
midres -0.132 -0.124 -0.040
midres2 -0.057 -0.053 -0.020
hdres -0.158 -0.177 -0.066
hdres2 -0.014 -0.018 -0.005
STATS_CHANGED
Change-Id: I486e66770a4454fb1f72c3276cd20a4b3ae3dd3d
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index f78bde4..296c6c5 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -289,7 +289,7 @@
}
if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
- add_proto qw/void av1_apply_temporal_filter_planewise/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const int use_subblock, const int block_mse, const int *subblock_mses, const uint8_t *pred, uint32_t *accum, uint16_t *count";
+ add_proto qw/void av1_apply_temporal_filter_planewise/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const int use_subblock, const int block_mse, const int *subblock_mses, const int q_factor, const uint8_t *pred, uint32_t *accum, uint16_t *count";
specialize qw/av1_apply_temporal_filter_planewise sse2 avx2/;
}
add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 37487c3..583cfb7 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -645,6 +645,8 @@
// use_subblock: Whether to use 4 sub-blocks to replace the original block.
// block_mse: Motion search error (MSE) for the entire block.
// subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks.
+// q_factor: Quantization factor. This is actually the `q` defined in libaom,
+// which is converted from `qindex`.
// pred: Pointer to the well-built predictors.
// accum: Pointer to the pixel-wise accumulator for filtering.
// count: Pointer to the pixel-wise counter fot filtering.
@@ -655,8 +657,8 @@
const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
const int num_planes, const double *noise_levels, const int use_subblock,
- const int block_mse, const int *subblock_mses, const uint8_t *pred,
- uint32_t *accum, uint16_t *count) {
+ const int block_mse, const int *subblock_mses, const int q_factor,
+ const uint8_t *pred, uint32_t *accum, uint16_t *count) {
assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
// Block information.
@@ -747,10 +749,11 @@
// Control factor for non-local mean approach.
const double r =
(double)decay_control * (0.7 + log(noise_levels[plane] + 1.0));
+ const double q = AOMMIN((double)(q_factor * q_factor) / 256.0, 1);
// Compute filter weight.
const double scaled_diff =
- AOMMAX(-(window_error + block_error / 10) / (2 * r * r), -15.0);
+ AOMMAX(-(window_error + block_error / 10) / (2 * r * r * q), -15.0);
const int adjusted_weight =
(int)(exp(scaled_diff) * TF_PLANEWISE_FILTER_WEIGHT_SCALE);
@@ -792,6 +795,7 @@
// strategy)
// block_mse: Motion search error (MSE) for the entire block.
// subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks.
+// q_factor: Quantization factor.
// pred: Pointer to the well-built predictors.
// accum: Pointer to the pixel-wise accumulator for filtering.
// count: Pointer to the pixel-wise counter fot filtering.
@@ -804,7 +808,7 @@
const int num_planes, const int use_planewise_strategy, const int strength,
const int use_subblock, const int *subblock_filter_weights,
const double *noise_levels, const int block_mse, const int *subblock_mses,
- const uint8_t *pred, uint32_t *accum, uint16_t *count) {
+ const int q_factor, const uint8_t *pred, uint32_t *accum, uint16_t *count) {
assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
if (use_planewise_strategy) { // Commonly used for high-resolution video.
@@ -812,13 +816,13 @@
if (is_frame_high_bitdepth(frame_to_filter)) {
av1_apply_temporal_filter_planewise_c(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
- noise_levels, use_subblock, block_mse, subblock_mses, pred, accum,
- count);
+ noise_levels, use_subblock, block_mse, subblock_mses, q_factor, pred,
+ accum, count);
} else {
- av1_apply_temporal_filter_planewise(frame_to_filter, mbd, block_size,
- mb_row, mb_col, num_planes,
- noise_levels, use_subblock, block_mse,
- subblock_mses, pred, accum, count);
+ av1_apply_temporal_filter_planewise(
+ frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
+ noise_levels, use_subblock, block_mse, subblock_mses, q_factor, pred,
+ accum, count);
}
} else { // Commonly used for low-resolution video.
if (subblock_filter_weights[0] == 0 && subblock_filter_weights[1] == 0 &&
@@ -1026,11 +1030,17 @@
subblock_filter_weights[0], pred,
accum, count);
} else { // Other reference frames.
+ const FRAME_TYPE frame_type =
+ (cpi->common.current_frame.frame_number > 1) ? INTER_FRAME
+ : KEY_FRAME;
+ const int q_factor =
+ (int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[frame_type],
+ cpi->common.seq_params.bit_depth);
av1_apply_temporal_filter_others(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
use_planewise_strategy, strength, use_subblock,
subblock_filter_weights, noise_levels, block_mse, subblock_mses,
- pred, accum, count);
+ q_factor, pred, accum, count);
}
}
diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index 07e14f7..a11f791 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c
@@ -131,9 +131,9 @@
const uint8_t *frame1, const unsigned int stride, const uint8_t *frame2,
const unsigned int stride2, const int block_width, const int block_height,
const double sigma, const int decay_control, const int use_subblock,
- const int block_mse, const int *subblock_mses, unsigned int *accumulator,
- uint16_t *count, uint16_t *luma_sq_error, uint16_t *chroma_sq_error,
- int plane, int ss_x_shift, int ss_y_shift) {
+ const int block_mse, const int *subblock_mses, const int q_factor,
+ unsigned int *accumulator, uint16_t *count, uint16_t *luma_sq_error,
+ uint16_t *chroma_sq_error, int plane, int ss_x_shift, int ss_y_shift) {
assert(TF_PLANEWISE_FILTER_WINDOW_LENGTH == 5);
assert(((block_width == 32) && (block_height == 32)) ||
((block_width == 16) && (block_height == 16)));
@@ -141,6 +141,7 @@
uint32_t acc_5x5_sse[BH][BW];
const double h = decay_control * (0.7 + log(sigma + 1.0));
+ const double q = AOMMIN((double)(q_factor * q_factor) / 256.0, 1);
uint16_t *frame_sse =
(plane == PLANE_TYPE_Y) ? luma_sq_error : chroma_sq_error;
@@ -226,7 +227,7 @@
(double)(use_subblock ? subblock_mses[subblock_idx] : block_mse);
const double scaled_diff =
- AOMMAX(-(window_error + block_error / 10) / (2 * h * h), -15.0);
+ AOMMAX(-(window_error + block_error / 10) / (2 * h * h * q), -15.0);
const int adjusted_weight =
(int)(exp(scaled_diff) * TF_PLANEWISE_FILTER_WEIGHT_SCALE);
@@ -240,8 +241,8 @@
const YV12_BUFFER_CONFIG *ref_frame, const MACROBLOCKD *mbd,
const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
const int num_planes, const double *noise_levels, const int use_subblock,
- const int block_mse, const int *subblock_mses, const uint8_t *pred,
- uint32_t *accum, uint16_t *count) {
+ const int block_mse, const int *subblock_mses, const int q_factor,
+ const uint8_t *pred, uint32_t *accum, uint16_t *count) {
const int is_high_bitdepth = ref_frame->flags & YV12_FLAG_HIGHBITDEPTH;
if (is_high_bitdepth) {
assert(0 && "Only support low bit-depth with avx2!");
@@ -275,8 +276,9 @@
apply_temporal_filter_planewise(
ref, frame_stride, pred + mb_pels * plane, plane_w, plane_w, plane_h,
noise_levels[plane], decay_control, use_subblock, block_mse,
- subblock_mses, accum + mb_pels * plane, count + mb_pels * plane,
- luma_sq_error, chroma_sq_error, plane, ss_x_shift, ss_y_shift);
+ subblock_mses, q_factor, accum + mb_pels * plane,
+ count + mb_pels * plane, luma_sq_error, chroma_sq_error, plane,
+ ss_x_shift, ss_y_shift);
}
if (chroma_sq_error != NULL) aom_free(chroma_sq_error);
}
diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index 4fc8738..98a6b82 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c
@@ -106,9 +106,9 @@
const uint8_t *frame1, const unsigned int stride, const uint8_t *frame2,
const unsigned int stride2, const int block_width, const int block_height,
const double sigma, const int decay_control, const int use_subblock,
- const int block_mse, const int *subblock_mses, unsigned int *accumulator,
- uint16_t *count, uint16_t *luma_sq_error, uint16_t *chroma_sq_error,
- int plane, int ss_x_shift, int ss_y_shift) {
+ const int block_mse, const int *subblock_mses, const int q_factor,
+ unsigned int *accumulator, uint16_t *count, uint16_t *luma_sq_error,
+ uint16_t *chroma_sq_error, int plane, int ss_x_shift, int ss_y_shift) {
assert(TF_PLANEWISE_FILTER_WINDOW_LENGTH == 5);
assert(((block_width == 32) && (block_height == 32)) ||
((block_width == 16) && (block_height == 16)));
@@ -116,6 +116,7 @@
uint32_t acc_5x5_sse[BH][BW];
const double h = decay_control * (0.7 + log(sigma + 1.0));
+ const double q = AOMMIN((double)(q_factor * q_factor) / 256.0, 1);
uint16_t *frame_sse =
(plane == PLANE_TYPE_Y) ? luma_sq_error : chroma_sq_error;
@@ -204,7 +205,7 @@
(double)(use_subblock ? subblock_mses[subblock_idx] : block_mse);
const double scaled_diff =
- AOMMAX(-(window_error + block_error / 10) / (2 * h * h), -15.0);
+ AOMMAX(-(window_error + block_error / 10) / (2 * h * h * q), -15.0);
const int adjusted_weight =
(int)(exp(scaled_diff) * TF_PLANEWISE_FILTER_WEIGHT_SCALE);
@@ -218,8 +219,8 @@
const YV12_BUFFER_CONFIG *ref_frame, const MACROBLOCKD *mbd,
const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
const int num_planes, const double *noise_levels, const int use_subblock,
- const int block_mse, const int *subblock_mses, const uint8_t *pred,
- uint32_t *accum, uint16_t *count) {
+ const int block_mse, const int *subblock_mses, const int q_factor,
+ const uint8_t *pred, uint32_t *accum, uint16_t *count) {
const int is_high_bitdepth = ref_frame->flags & YV12_FLAG_HIGHBITDEPTH;
if (is_high_bitdepth) {
assert(0 && "Only support low bit-depth with sse2!");
@@ -253,8 +254,9 @@
apply_temporal_filter_planewise(
ref, frame_stride, pred + mb_pels * plane, plane_w, plane_w, plane_h,
noise_levels[plane], decay_control, use_subblock, block_mse,
- subblock_mses, accum + mb_pels * plane, count + mb_pels * plane,
- luma_sq_error, chroma_sq_error, plane, ss_x_shift, ss_y_shift);
+ subblock_mses, q_factor, accum + mb_pels * plane,
+ count + mb_pels * plane, luma_sq_error, chroma_sq_error, plane,
+ ss_x_shift, ss_y_shift);
}
if (chroma_sq_error != NULL) aom_free(chroma_sq_error);
}
diff --git a/test/temporal_filter_planewise_test.cc b/test/temporal_filter_planewise_test.cc
index b19ec29..c3f3e9e 100644
--- a/test/temporal_filter_planewise_test.cc
+++ b/test/temporal_filter_planewise_test.cc
@@ -41,8 +41,8 @@
const YV12_BUFFER_CONFIG *ref_frame, const MACROBLOCKD *mbd,
const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
const int num_planes, const double *noise_level, const int use_subblock,
- const int block_mse, const int *subblock_mses, const uint8_t *pred,
- uint32_t *accum, uint16_t *count);
+ const int block_mse, const int *subblock_mses, const int q_factor,
+ const uint8_t *pred, uint32_t *accum, uint16_t *count);
typedef libaom_test::FuncParam<TemporalFilterPlanewiseFunc>
TemporalFilterPlanewiseFuncParam;
@@ -126,8 +126,9 @@
assert(width == 32 && height == 32);
const BLOCK_SIZE block_size = BLOCK_32X32;
const int use_subblock = 0;
- const int block_mse = 0;
- const int subblock_mses[4] = { 0, 0, 0, 0 };
+ const int block_mse = 20;
+ const int subblock_mses[4] = { 15, 16, 17, 18 };
+ const int q_factor = 12;
const int mb_row = 0;
const int mb_col = 0;
const int num_planes = 1;
@@ -147,18 +148,18 @@
mbd->bd = 8;
params_.ref_func(ref_frame, mbd, block_size, mb_row, mb_col, num_planes,
- sigma, use_subblock, block_mse, subblock_mses, src2_,
- accumulator_ref, count_ref);
+ sigma, use_subblock, block_mse, subblock_mses, q_factor,
+ src2_, accumulator_ref, count_ref);
params_.tst_func(ref_frame, mbd, block_size, mb_row, mb_col, num_planes,
- sigma, use_subblock, block_mse, subblock_mses, src2_,
- accumulator_mod, count_mod);
+ sigma, use_subblock, block_mse, subblock_mses, q_factor,
+ src2_, accumulator_mod, count_mod);
if (run_times > 1) {
aom_usec_timer_start(&ref_timer);
for (int j = 0; j < run_times; j++) {
params_.ref_func(ref_frame, mbd, block_size, mb_row, mb_col, num_planes,
- sigma, use_subblock, block_mse, subblock_mses, src2_,
- accumulator_ref, count_ref);
+ sigma, use_subblock, block_mse, subblock_mses,
+ q_factor, src2_, accumulator_ref, count_ref);
}
aom_usec_timer_mark(&ref_timer);
const int elapsed_time_c =
@@ -167,8 +168,8 @@
aom_usec_timer_start(&test_timer);
for (int j = 0; j < run_times; j++) {
params_.tst_func(ref_frame, mbd, block_size, mb_row, mb_col, num_planes,
- sigma, use_subblock, block_mse, subblock_mses, src2_,
- accumulator_mod, count_mod);
+ sigma, use_subblock, block_mse, subblock_mses,
+ q_factor, src2_, accumulator_mod, count_mod);
}
aom_usec_timer_mark(&test_timer);
const int elapsed_time_simd =