Add highbd avg_4x4, avg_8x8, minmax_8x8
Used in var based partition.
BUG=aomedia:2520
Change-Id: I6b90cf78d5585050600349912ff7293d8da7db40
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index eedb925..9867602 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -883,6 +883,12 @@
add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/aom_minmax_8x8 sse2/;
+ if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p";
+ add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p";
+ add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ }
+
add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height";
specialize qw/aom_int_pro_row sse2/;
diff --git a/aom_dsp/avg.c b/aom_dsp/avg.c
index 6cb92d5..5e8c5a3 100644
--- a/aom_dsp/avg.c
+++ b/aom_dsp/avg.c
@@ -48,6 +48,46 @@
return (sum + 32) >> 6;
}
+#if CONFIG_AV1_HIGHBITDEPTH
+unsigned int aom_highbd_avg_8x8_c(const uint8_t *s8, int p) {
+ int i, j;
+ int sum = 0;
+ const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+ for (i = 0; i < 8; ++i, s += p)
+ for (j = 0; j < 8; sum += s[j], ++j) {
+ }
+
+ return (sum + 32) >> 6;
+}
+
+unsigned int aom_highbd_avg_4x4_c(const uint8_t *s8, int p) {
+ int i, j;
+ int sum = 0;
+ const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+ for (i = 0; i < 4; ++i, s += p)
+ for (j = 0; j < 4; sum += s[j], ++j) {
+ }
+
+ return (sum + 8) >> 4;
+}
+
+void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
+ int dp, int *min, int *max) {
+ int i, j;
+ const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+ const uint16_t *d = CONVERT_TO_SHORTPTR(d8);
+ *min = 255;
+ *max = 0;
+ for (i = 0; i < 8; ++i, s += p, d += dp) {
+ for (j = 0; j < 8; ++j) {
+ int diff = abs(s[j] - d[j]);
+ *min = diff < *min ? diff : *min;
+ *max = diff > *max ? diff : *max;
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
// src_diff: first pass, 9 bit, dynamic range [-255, 255]
// second pass, 12 bit, dynamic range [-2040, 2040]
static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index bff15eb..4f1bd9c 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -262,8 +262,12 @@
static AOM_INLINE void fill_variance_8x8avg(const uint8_t *s, int sp,
const uint8_t *d, int dp,
int x16_idx, int y16_idx,
- v16x16 *vst, int pixels_wide,
- int pixels_high, int is_key_frame) {
+ v16x16 *vst,
+#if CONFIG_AV1_HIGHBITDEPTH
+ int highbd_flag,
+#endif
+ int pixels_wide, int pixels_high,
+ int is_key_frame) {
int k;
for (k = 0; k < 4; k++) {
int x8_idx = x16_idx + ((k & 1) << 3);
@@ -273,9 +277,19 @@
if (x8_idx < pixels_wide && y8_idx < pixels_high) {
int s_avg;
int d_avg = 128;
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
+ s_avg = aom_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ if (!is_key_frame)
+ d_avg = aom_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ } else {
+ s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ }
+#else
s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
-
+#endif
sum = s_avg - d_avg;
sse = sum * sum;
}
@@ -284,8 +298,11 @@
}
static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
- int dp, int x16_idx, int y16_idx, int pixels_wide,
- int pixels_high) {
+ int dp, int x16_idx, int y16_idx,
+#if CONFIG_AV1_HIGHBITDEPTH
+ int highbd_flag,
+#endif
+ int pixels_wide, int pixels_high) {
int k;
int minmax_max = 0;
int minmax_min = 255;
@@ -296,8 +313,18 @@
int min = 0;
int max = 0;
if (x8_idx < pixels_wide && y8_idx < pixels_high) {
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
+ aom_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ d + y8_idx * dp + x8_idx, dp, &min, &max);
+ } else {
+ aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
+ dp, &min, &max);
+ }
+#else
aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
&min, &max);
+#endif
if ((max - min) > minmax_max) minmax_max = (max - min);
if ((max - min) < minmax_min) minmax_min = (max - min);
}
@@ -308,6 +335,9 @@
static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
const uint8_t *d, int dp,
int x8_idx, int y8_idx, v8x8 *vst,
+#if CONFIG_AV1_HIGHBITDEPTH
+ int highbd_flag,
+#endif
int pixels_wide, int pixels_high,
int is_key_frame) {
int k;
@@ -319,8 +349,20 @@
if (x4_idx < pixels_wide && y4_idx < pixels_high) {
int s_avg;
int d_avg = 128;
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
+ s_avg = aom_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ if (!is_key_frame)
+ d_avg = aom_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ } else {
+ s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ }
+#else
s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+#endif
+
sum = s_avg - d_avg;
sse = sum * sum;
}
@@ -708,8 +750,11 @@
force_split[split_index] = 0;
variance4x4downsample[i2 + j] = 0;
if (!is_key_frame) {
- fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide,
- pixels_high, is_key_frame);
+ fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
+#if CONFIG_AV1_HIGHBITDEPTH
+ xd->cur_buf->flags,
+#endif
+ pixels_wide, pixels_high, is_key_frame);
fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
get_variance(&vt->split[m].split[i].split[j].part_variances.none);
avg_16x16[m][i] +=
@@ -741,6 +786,9 @@
// compute the minmax over the 8x8 sub-blocks, and if above
// threshold, force split to 8x8 block for this 16x16 block.
int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
+#if CONFIG_AV1_HIGHBITDEPTH
+ xd->cur_buf->flags,
+#endif
pixels_wide, pixels_high);
int thresh_minmax = (int)cpi->vbp_threshold_minmax;
if (minmax > thresh_minmax) {
@@ -760,6 +808,9 @@
int y8_idx = y16_idx + ((k >> 1) << 3);
v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
+#if CONFIG_AV1_HIGHBITDEPTH
+ xd->cur_buf->flags,
+#endif
pixels_wide, pixels_high, is_key_frame);
}
}