Use the short filter in subpel motion search
While using accurate subpel search(sf->use_accurate_subpel_search=1), we
use 8-tap interplation filter, which brings high computational complexity.
This patch replaced it with 4-tap filter, and enabled it as a speed 1
feature. The borg test result showed
negligable coding performance change.
avg_psnr ovr_psnr ssim
hdres set: -0.010 -0.090 -0.001
midres set: -0.009 -0.015 0.015
lowres set: 0.021 0.046 0.005
The SIMD optimization of 4-tap filters will be added later that will give
encoder speedups. Also, the filter can be redesigned for this purpose.
STATS_CHANGED
Change-Id: I296c340a3c4977f43a623a9e7c826f6ea2bf18b8
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index e059652..2639e39 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -888,36 +888,37 @@
#
add_proto qw/void aom_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, int width, int height, int subpel_x_q3,
- int subpel_y_q3, const uint8_t *ref, int ref_stride";
+ int subpel_y_q3, const uint8_t *ref, int ref_stride, int subpel_search";
specialize qw/aom_upsampled_pred sse2/;
add_proto qw/void aom_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride";
+ int ref_stride, int subpel_search";
specialize qw/aom_comp_avg_upsampled_pred sse2/;
add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param";
+ int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search";
specialize qw/aom_jnt_comp_avg_upsampled_pred ssse3/;
add_proto qw/void aom_highbd_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, int width, int height, int subpel_x_q3,
- int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd";
+ int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search";
specialize qw/aom_highbd_upsampled_pred sse2/;
add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd";
+ int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride,
+ int bd, int subpel_search";
specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
add_proto qw/void aom_highbd_jnt_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param";
+ int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param, int subpel_search";
specialize qw/aom_highbd_jnt_comp_avg_upsampled_pred sse2/;
diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index 817ebe1..d567d45 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c
@@ -302,7 +302,7 @@
int mi_row, int mi_col, const MV *const mv,
uint8_t *comp_pred, int width, int height,
int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride) {
+ int ref_stride, int subpel_search) {
// expect xd == NULL only in tests
if (xd != NULL) {
const MB_MODE_INFO *mi = xd->mi[0];
@@ -387,7 +387,9 @@
}
const InterpFilterParams *filter =
- av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
+ (subpel_search == 1)
+ ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
+ : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
for (int i = 0; i < height; i++) {
@@ -429,11 +431,11 @@
uint8_t *comp_pred, const uint8_t *pred,
int width, int height, int subpel_x_q3,
int subpel_y_q3, const uint8_t *ref,
- int ref_stride) {
+ int ref_stride, int subpel_search) {
int i, j;
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride);
+ subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
@@ -466,13 +468,13 @@
MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param) {
+ int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search) {
int i, j;
const int fwd_offset = jcp_param->fwd_offset;
const int bck_offset = jcp_param->bck_offset;
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride);
+ subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
@@ -889,7 +891,8 @@
int mi_col, const MV *const mv,
uint16_t *comp_pred, int width, int height,
int subpel_x_q3, int subpel_y_q3,
- const uint8_t *ref8, int ref_stride, int bd) {
+ const uint8_t *ref8, int ref_stride, int bd,
+ int subpel_search) {
// expect xd == NULL only in tests
if (xd != NULL) {
const MB_MODE_INFO *mi = xd->mi[0];
@@ -975,7 +978,9 @@
}
const InterpFilterParams *filter =
- av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
+ (subpel_search == 1)
+ ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
+ : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
const uint16_t *ref;
@@ -1021,13 +1026,13 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd) {
+ int ref_stride, int bd, int subpel_search) {
int i, j;
const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd);
+ bd, subpel_search);
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
@@ -1063,7 +1068,8 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param) {
+ int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param,
+ int subpel_search) {
int i, j;
const int fwd_offset = jcp_param->fwd_offset;
const int bck_offset = jcp_param->bck_offset;
@@ -1071,7 +1077,7 @@
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd);
+ bd, subpel_search);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
@@ -1110,10 +1116,12 @@
int width, int height, int subpel_x_q3,
int subpel_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask) {
+ int mask_stride, int invert_mask,
+ int subpel_search) {
if (subpel_x_q3 | subpel_y_q3) {
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride);
+ subpel_x_q3, subpel_y_q3, ref, ref_stride,
+ subpel_search);
ref = comp_pred;
ref_stride = width;
}
@@ -1190,10 +1198,10 @@
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
- int bd) {
+ int bd, int subpel_search) {
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd);
+ bd, subpel_search);
aom_highbd_comp_mask_pred(comp_pred, pred8, width, height,
CONVERT_TO_BYTEPTR(comp_pred), width, mask,
mask_stride, invert_mask);
diff --git a/aom_dsp/variance.h b/aom_dsp/variance.h
index b954470..a3e74b9 100644
--- a/aom_dsp/variance.h
+++ b/aom_dsp/variance.h
@@ -74,14 +74,15 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask);
+ int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
+ int subpel_search);
void aom_highbd_comp_mask_upsampled_pred(
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
- int bd);
+ int bd, int subpel_search);
typedef unsigned int (*aom_obmc_sad_fn_t)(const uint8_t *pred, int pred_stride,
const int32_t *wsrc,
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index e9b5e73..8e37811 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -595,8 +595,8 @@
int mi_row, int mi_col, const MV *const mv,
uint16_t *comp_pred, int width, int height,
int subpel_x_q3, int subpel_y_q3,
- const uint8_t *ref8, int ref_stride,
- int bd) {
+ const uint8_t *ref8, int ref_stride, int bd,
+ int subpel_search) {
// expect xd == NULL only in tests
if (xd != NULL) {
const MB_MODE_INFO *mi = xd->mi[0];
@@ -680,7 +680,9 @@
}
const InterpFilterParams *filter =
- av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
+ (subpel_search == 1)
+ ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
+ : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
@@ -746,13 +748,13 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd) {
+ int ref_stride, int bd, int subpel_search) {
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
int n;
int i;
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd);
+ bd, subpel_search);
/*The total number of pixels must be a multiple of 8 (e.g., 4x4).*/
assert(!(width * height & 7));
n = width * height >> 3;
@@ -835,13 +837,14 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param) {
+ int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param,
+ int subpel_search) {
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
int n;
int i;
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd);
+ bd, subpel_search);
assert(!(width * height & 7));
n = width * height >> 3;
diff --git a/aom_dsp/x86/jnt_variance_ssse3.c b/aom_dsp/x86/jnt_variance_ssse3.c
index eaf1f34..f9a41a2 100644
--- a/aom_dsp/x86/jnt_variance_ssse3.c
+++ b/aom_dsp/x86/jnt_variance_ssse3.c
@@ -120,11 +120,11 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param) {
+ int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search) {
int n;
int i;
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride);
+ subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
/*The total number of pixels must be a multiple of 16 (e.g., 4x4).*/
assert(!(width * height & 15));
n = width * height >> 4;
diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c
index 9efddb9..ae6fb34 100644
--- a/aom_dsp/x86/variance_sse2.c
+++ b/aom_dsp/x86/variance_sse2.c
@@ -486,7 +486,8 @@
int mi_row, int mi_col, const MV *const mv,
uint8_t *comp_pred, int width, int height,
int subpel_x_q3, int subpel_y_q3,
- const uint8_t *ref, int ref_stride) {
+ const uint8_t *ref, int ref_stride,
+ int subpel_search) {
// expect xd == NULL only in tests
if (xd != NULL) {
const MB_MODE_INFO *mi = xd->mi[0];
@@ -571,7 +572,9 @@
}
const InterpFilterParams *filter =
- av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
+ (subpel_search == 1)
+ ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
+ : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
if (width >= 16) {
@@ -649,11 +652,11 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride) {
+ int ref_stride, int subpel_search) {
int n;
int i;
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride);
+ subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
/*The total number of pixels must be a multiple of 16 (e.g., 4x4).*/
assert(!(width * height & 15));
n = width * height >> 4;
diff --git a/av1/common/filter.h b/av1/common/filter.h
index 7f8ad58..48bc49a 100644
--- a/av1/common/filter.h
+++ b/av1/common/filter.h
@@ -181,6 +181,11 @@
return &av1_interp_filter_params_list[interp_filter];
}
+static INLINE const InterpFilterParams *av1_get_4tap_interp_filter_params(
+ const InterpFilter interp_filter) {
+ return &av1_interp_4tap[interp_filter];
+}
+
static INLINE const int16_t *av1_get_interp_filter_kernel(
const InterpFilter interp_filter) {
return av1_interp_filter_params_list[interp_filter].filter_ptr;
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index ee58802..ba66bae 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -220,7 +220,7 @@
thismse = upsampled_pref_error( \
xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride, \
pre(y, y_stride, r, c), y_stride, sp(c), sp(r), second_pred, mask, \
- mask_stride, invert_mask, w, h, &sse); \
+ mask_stride, invert_mask, w, h, &sse, use_accurate_subpel_search); \
v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
v += thismse; \
if (v < besterr) { \
@@ -649,7 +649,7 @@
int subpel_x_q3, int subpel_y_q3,
const uint8_t *second_pred, const uint8_t *mask,
int mask_stride, int invert_mask, int w, int h,
- unsigned int *sse) {
+ unsigned int *sse, int subpel_search) {
unsigned int besterr;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
@@ -657,20 +657,23 @@
if (mask) {
aom_highbd_comp_mask_upsampled_pred(
xd, cm, mi_row, mi_col, mv, pred16, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd);
+ subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd,
+ subpel_search);
} else {
if (xd->jcp_param.use_jnt_comp_avg)
aom_highbd_jnt_comp_avg_upsampled_pred(
xd, cm, mi_row, mi_col, mv, pred16, second_pred, w, h,
- subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, &xd->jcp_param);
+ subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, &xd->jcp_param,
+ subpel_search);
else
- aom_highbd_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred16,
- second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, xd->bd);
+ aom_highbd_comp_avg_upsampled_pred(
+ xd, cm, mi_row, mi_col, mv, pred16, second_pred, w, h,
+ subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, subpel_search);
}
} else {
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred16, w, h,
- subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd);
+ subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
+ subpel_search);
}
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
@@ -678,22 +681,23 @@
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
if (second_pred != NULL) {
if (mask) {
- aom_comp_mask_upsampled_pred(
- xd, cm, mi_row, mi_col, mv, pred, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask);
+ aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
+ second_pred, w, h, subpel_x_q3,
+ subpel_y_q3, y, y_stride, mask,
+ mask_stride, invert_mask, subpel_search);
} else {
if (xd->jcp_param.use_jnt_comp_avg)
aom_jnt_comp_avg_upsampled_pred(
xd, cm, mi_row, mi_col, mv, pred, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, &xd->jcp_param);
+ subpel_y_q3, y, y_stride, &xd->jcp_param, subpel_search);
else
aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride);
+ subpel_y_q3, y, y_stride, subpel_search);
}
} else {
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride);
+ subpel_y_q3, y, y_stride, subpel_search);
}
besterr = vfp->vf(pred, w, src, src_stride, sse);
@@ -708,10 +712,11 @@
const int src_stride, const uint8_t *const y, int y_stride,
const uint8_t *second_pred, const uint8_t *mask, int mask_stride,
int invert_mask, int w, int h, int offset, int *mvjcost, int *mvcost[2],
- unsigned int *sse1, int *distortion) {
- unsigned int besterr = upsampled_pref_error(
- xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride, y + offset,
- y_stride, 0, 0, second_pred, mask, mask_stride, invert_mask, w, h, sse1);
+ unsigned int *sse1, int *distortion, int subpel_search) {
+ unsigned int besterr =
+ upsampled_pref_error(xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride,
+ y + offset, y_stride, 0, 0, second_pred, mask,
+ mask_stride, invert_mask, w, h, sse1, subpel_search);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
@@ -782,7 +787,8 @@
besterr = upsampled_setup_center_error(
xd, cm, mi_row, mi_col, bestmv, ref_mv, error_per_bit, vfp, src_address,
src_stride, y, y_stride, second_pred, mask, mask_stride, invert_mask, w,
- h, offset, mvjcost, mvcost, sse1, distortion);
+ h, offset, mvjcost, mvcost, sse1, distortion,
+ use_accurate_subpel_search);
else
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
src_address, src_stride, y, y_stride,
@@ -803,7 +809,8 @@
thismse = upsampled_pref_error(
xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
- mask, mask_stride, invert_mask, w, h, &sse);
+ mask, mask_stride, invert_mask, w, h, &sse,
+ use_accurate_subpel_search);
} else {
thismse = estimate_upsampled_pref_error(
xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc),
@@ -838,7 +845,8 @@
thismse = upsampled_pref_error(
xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
- mask, mask_stride, invert_mask, w, h, &sse);
+ mask, mask_stride, invert_mask, w, h, &sse,
+ use_accurate_subpel_search);
} else {
thismse = estimate_upsampled_pref_error(
xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc),
@@ -2304,7 +2312,8 @@
MV this_mv = { r, c }; \
thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv, \
mask, vfp, z, pre(y, y_stride, r, c), \
- y_stride, sp(c), sp(r), w, h, &sse); \
+ y_stride, sp(c), sp(r), w, h, &sse, \
+ use_accurate_subpel_search); \
if ((v = MVC(r, c) + thismse) < besterr) { \
besterr = v; \
br = r; \
@@ -2332,18 +2341,20 @@
MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
const int32_t *const wsrc, const uint8_t *const y, int y_stride,
- int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse) {
+ int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
+ int subpel_search) {
unsigned int besterr;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred16, w, h,
- subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd);
+ subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
+ subpel_search);
besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
} else {
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride);
+ subpel_y_q3, y, y_stride, subpel_search);
besterr = vfp->ovf(pred, w, wsrc, mask, sse);
}
@@ -2355,10 +2366,11 @@
const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
const uint8_t *const y, int y_stride, int w, int h, int offset,
- int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
- unsigned int besterr =
- upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc,
- y + offset, y_stride, 0, 0, w, h, sse1);
+ int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
+ int subpel_search) {
+ unsigned int besterr = upsampled_obmc_pref_error(
+ xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
+ 0, w, h, sse1, subpel_search);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
@@ -2413,11 +2425,12 @@
bestmv->row *= 8;
bestmv->col *= 8;
- // use_accurate_subpel_search can be 0 or 1
+ // use_accurate_subpel_search can be 0 or 1 or 2
if (use_accurate_subpel_search)
besterr = upsampled_setup_obmc_center_error(
xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
- y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
+ use_accurate_subpel_search);
else
besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
z, y, y_stride, offset, mvjcost, mvcost,
@@ -2433,7 +2446,8 @@
if (use_accurate_subpel_search) {
thismse = upsampled_obmc_pref_error(
xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
- pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse);
+ pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
+ use_accurate_subpel_search);
} else {
thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
sp(tr), src_address, mask, &sse);
@@ -2464,7 +2478,8 @@
if (use_accurate_subpel_search) {
thismse = upsampled_obmc_pref_error(
xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
- pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse);
+ pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
+ use_accurate_subpel_search);
} else {
thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
src_address, mask, &sse);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 19ef55c..68da931 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -200,6 +200,7 @@
sf->use_intra_txb_hash = 1;
sf->optimize_b_precheck = 1;
sf->dual_sgr_penalty_level = 1;
+ sf->use_accurate_subpel_search = 1;
}
if (speed >= 2) {
@@ -431,7 +432,7 @@
sf->disable_filter_search_var_thresh = 0;
sf->adaptive_interp_filter_search = 0;
sf->allow_partition_search_skip = 0;
- sf->use_accurate_subpel_search = 1;
+ sf->use_accurate_subpel_search = 2;
sf->disable_wedge_search_var_thresh = 0;
sf->fast_wedge_sign_estimate = 0;
sf->drop_ref = 0;
diff --git a/test/comp_avg_pred_test.h b/test/comp_avg_pred_test.h
index ab2004c..7028d22 100644
--- a/test/comp_avg_pred_test.h
+++ b/test/comp_avg_pred_test.h
@@ -36,7 +36,7 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param);
+ int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search);
typedef void (*highbdjntcompavg_func)(uint16_t *comp_pred, const uint8_t *pred8,
int width, int height,
@@ -47,7 +47,8 @@
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param);
+ int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param,
+ int subpel_search);
typedef ::testing::tuple<jntcompavg_func, BLOCK_SIZE> JNTCOMPAVGParam;
@@ -217,33 +218,39 @@
JNT_COMP_PARAMS jnt_comp_params;
jnt_comp_params.use_jnt_comp_avg = 1;
int sub_x_q3, sub_y_q3;
- for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
- for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+ int subpel_search;
+ for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
+ for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
+ for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
+ for (int ii = 0; ii < 2; ii++) {
+ for (int jj = 0; jj < 4; jj++) {
+ jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+ jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+ const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+ const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
- aom_jnt_comp_avg_upsampled_pred_c(
- NULL, NULL, 0, 0, NULL, output, pred8 + offset_r * w + offset_c,
- in_w, in_h, sub_x_q3, sub_y_q3, ref8 + offset_r * w + offset_c,
- in_w, &jnt_comp_params);
- test_impl(NULL, NULL, 0, 0, NULL, output2,
- pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
- &jnt_comp_params);
+ aom_jnt_comp_avg_upsampled_pred_c(
+ NULL, NULL, 0, 0, NULL, output,
+ pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
+ sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
+ &jnt_comp_params, subpel_search);
+ test_impl(NULL, NULL, 0, 0, NULL, output2,
+ pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
+ sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
+ &jnt_comp_params, subpel_search);
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for AV1JNTCOMPAVGUPSAMPLEDTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index " << idx
- << " = (" << i << ", " << j << "), sub pixel offset = ("
- << sub_y_q3 << ", " << sub_x_q3 << ")";
+ for (int i = 0; i < in_h; ++i) {
+ for (int j = 0; j < in_w; ++j) {
+ int idx = i * in_w + j;
+ ASSERT_EQ(output[idx], output2[idx])
+ << "Mismatch at unit tests for "
+ "AV1JNTCOMPAVGUPSAMPLEDTest\n"
+ << in_w << "x" << in_h << " Pixel mismatch at index "
+ << idx << " = (" << i << ", " << j
+ << "), sub pixel offset = (" << sub_y_q3 << ", "
+ << sub_x_q3 << ")";
+ }
}
}
}
@@ -280,11 +287,12 @@
const int num_loops = 1000000000 / (in_w + in_h);
aom_usec_timer timer;
aom_usec_timer_start(&timer);
+ int subpel_search = 2; // set to 1 to test 4-tap filter.
for (int i = 0; i < num_loops; ++i)
aom_jnt_comp_avg_upsampled_pred_c(NULL, NULL, 0, 0, NULL, output, pred8,
in_w, in_h, sub_x_q3, sub_y_q3, ref8,
- in_w, &jnt_comp_params);
+ in_w, &jnt_comp_params, subpel_search);
aom_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
@@ -296,7 +304,7 @@
for (int i = 0; i < num_loops; ++i)
test_impl(NULL, NULL, 0, 0, NULL, output2, pred8, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8, in_w, &jnt_comp_params);
+ sub_y_q3, ref8, in_w, &jnt_comp_params, subpel_search);
aom_usec_timer_mark(&timer1);
const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
@@ -445,38 +453,41 @@
JNT_COMP_PARAMS jnt_comp_params;
jnt_comp_params.use_jnt_comp_avg = 1;
int sub_x_q3, sub_y_q3;
+ int subpel_search;
+ for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
+ for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
+ for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
+ for (int ii = 0; ii < 2; ii++) {
+ for (int jj = 0; jj < 4; jj++) {
+ jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+ jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
- for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
- for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+ const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+ const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+ aom_highbd_jnt_comp_avg_upsampled_pred_c(
+ NULL, NULL, 0, 0, NULL, output,
+ CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
+ in_h, sub_x_q3, sub_y_q3,
+ CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
+ &jnt_comp_params, subpel_search);
+ test_impl(NULL, NULL, 0, 0, NULL, output2,
+ CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c,
+ in_w, in_h, sub_x_q3, sub_y_q3,
+ CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
+ in_w, bd, &jnt_comp_params, subpel_search);
- aom_highbd_jnt_comp_avg_upsampled_pred_c(
- NULL, NULL, 0, 0, NULL, output,
- CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h,
- sub_x_q3, sub_y_q3,
- CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
- &jnt_comp_params);
- test_impl(NULL, NULL, 0, 0, NULL, output2,
- CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
- in_h, sub_x_q3, sub_y_q3,
- CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
- bd, &jnt_comp_params);
-
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for "
- "AV1HighBDJNTCOMPAVGUPSAMPLEDTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index " << idx
- << " = (" << i << ", " << j << "), sub pixel offset = ("
- << sub_y_q3 << ", " << sub_x_q3 << ")";
+ for (int i = 0; i < in_h; ++i) {
+ for (int j = 0; j < in_w; ++j) {
+ int idx = i * in_w + j;
+ ASSERT_EQ(output[idx], output2[idx])
+ << "Mismatch at unit tests for "
+ "AV1HighBDJNTCOMPAVGUPSAMPLEDTest\n"
+ << in_w << "x" << in_h << " Pixel mismatch at index "
+ << idx << " = (" << i << ", " << j
+ << "), sub pixel offset = (" << sub_y_q3 << ", "
+ << sub_x_q3 << ")";
+ }
}
}
}
@@ -511,12 +522,12 @@
const int num_loops = 1000000000 / (in_w + in_h);
aom_usec_timer timer;
aom_usec_timer_start(&timer);
-
+ int subpel_search = 2; // set to 1 to test 4-tap filter.
for (int i = 0; i < num_loops; ++i)
aom_highbd_jnt_comp_avg_upsampled_pred_c(
NULL, NULL, 0, 0, NULL, output, CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
sub_x_q3, sub_y_q3, CONVERT_TO_BYTEPTR(ref8), in_w, bd,
- &jnt_comp_params);
+ &jnt_comp_params, subpel_search);
aom_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
@@ -529,7 +540,7 @@
for (int i = 0; i < num_loops; ++i)
test_impl(NULL, NULL, 0, 0, NULL, output2, CONVERT_TO_BYTEPTR(pred8),
in_w, in_h, sub_x_q3, sub_y_q3, CONVERT_TO_BYTEPTR(ref8), in_w,
- bd, &jnt_comp_params);
+ bd, &jnt_comp_params, subpel_search);
aom_usec_timer_mark(&timer1);
const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
diff --git a/test/comp_mask_variance_test.cc b/test/comp_mask_variance_test.cc
index 2d842c9..b2ab496 100644
--- a/test/comp_mask_variance_test.cc
+++ b/test/comp_mask_variance_test.cc
@@ -190,26 +190,29 @@
const int w = block_size_wide[bsize];
const int h = block_size_high[bsize];
int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+ int subpel_search;
+ for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
+ // loop through subx and suby
+ for (int sub = 0; sub < 8 * 8; ++sub) {
+ int subx = sub & 0x7;
+ int suby = (sub >> 3);
+ for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+ const uint8_t *mask =
+ av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
- // loop through subx and suby
- for (int sub = 0; sub < 8 * 8; ++sub) {
- int subx = sub & 0x7;
- int suby = (sub >> 3);
- for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+ aom_comp_mask_pred = aom_comp_mask_pred_c; // ref
+ aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
+ w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
+ w, inv, subpel_search);
- aom_comp_mask_pred = aom_comp_mask_pred_c; // ref
- aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
- w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
- inv);
-
- aom_comp_mask_pred = test_impl; // test
- aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
- w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
- inv);
- ASSERT_EQ(CheckResult(w, h), true)
- << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
- << "," << suby << ")";
+ aom_comp_mask_pred = test_impl; // test
+ aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
+ w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
+ w, inv, subpel_search);
+ ASSERT_EQ(CheckResult(w, h), true)
+ << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
+ << "," << suby << ")";
+ }
}
}
}
@@ -228,6 +231,7 @@
const int num_loops = 1000000000 / (w + h);
comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
double elapsed_time[2] = { 0 };
+ int subpel_search = 2; // set to 1 to test 4-tap filter.
for (int i = 0; i < 2; ++i) {
aom_usec_timer timer;
aom_usec_timer_start(&timer);
@@ -235,7 +239,7 @@
for (int j = 0; j < num_loops; ++j) {
aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
- 0);
+ 0, subpel_search);
}
aom_usec_timer_mark(&timer);
double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
@@ -466,25 +470,31 @@
ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
}
- // loop through subx and suby
- for (int sub = 0; sub < 8 * 8; ++sub) {
- int subx = sub & 0x7;
- int suby = (sub >> 3);
- for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+ int subpel_search;
+ for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
+ // loop through subx and suby
+ for (int sub = 0; sub < 8 * 8; ++sub) {
+ int subx = sub & 0x7;
+ int suby = (sub >> 3);
+ for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+ const uint8_t *mask =
+ av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
- aom_highbd_comp_mask_pred = aom_highbd_comp_mask_pred_c; // ref
- aom_highbd_comp_mask_upsampled_pred(
- NULL, NULL, 0, 0, NULL, comp_pred1_, CONVERT_TO_BYTEPTR(pred_), w, h,
- subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv, bd_);
+ aom_highbd_comp_mask_pred = aom_highbd_comp_mask_pred_c; // ref
+ aom_highbd_comp_mask_upsampled_pred(
+ NULL, NULL, 0, 0, NULL, comp_pred1_, CONVERT_TO_BYTEPTR(pred_), w,
+ h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv,
+ bd_, subpel_search);
- aom_highbd_comp_mask_pred = test_impl; // test
- aom_highbd_comp_mask_upsampled_pred(
- NULL, NULL, 0, 0, NULL, comp_pred2_, CONVERT_TO_BYTEPTR(pred_), w, h,
- subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv, bd_);
- ASSERT_EQ(CheckResult(w, h), true)
- << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
- << "," << suby << ")";
+ aom_highbd_comp_mask_pred = test_impl; // test
+ aom_highbd_comp_mask_upsampled_pred(
+ NULL, NULL, 0, 0, NULL, comp_pred2_, CONVERT_TO_BYTEPTR(pred_), w,
+ h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv,
+ bd_, subpel_search);
+ ASSERT_EQ(CheckResult(w, h), true)
+ << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
+ << "," << suby << ")";
+ }
}
}
}
@@ -516,10 +526,12 @@
aom_usec_timer timer;
aom_usec_timer_start(&timer);
aom_highbd_comp_mask_pred = funcs[i];
+ int subpel_search = 2; // set to 1 to test 4-tap filter.
for (int j = 0; j < num_loops; ++j) {
aom_highbd_comp_mask_upsampled_pred(
NULL, NULL, 0, 0, NULL, comp_pred1_, CONVERT_TO_BYTEPTR(pred_), w, h,
- subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0, bd_);
+ subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0, bd_,
+ subpel_search);
}
aom_usec_timer_mark(&timer);
double time = static_cast<double>(aom_usec_timer_elapsed(&timer));