Add highbd avg_4x4, avg_8x8, minmax_8x8

Used in var based partition.

BUG=aomedia:2520

Change-Id: I6b90cf78d5585050600349912ff7293d8da7db40
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index eedb925..9867602 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -883,6 +883,12 @@
   add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
   specialize qw/aom_minmax_8x8 sse2/;
 
+  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p";
+    add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p";
+    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+  }
+
   add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height";
   specialize qw/aom_int_pro_row sse2/;
 
diff --git a/aom_dsp/avg.c b/aom_dsp/avg.c
index 6cb92d5..5e8c5a3 100644
--- a/aom_dsp/avg.c
+++ b/aom_dsp/avg.c
@@ -48,6 +48,46 @@
   return (sum + 32) >> 6;
 }
 
+#if CONFIG_AV1_HIGHBITDEPTH
+unsigned int aom_highbd_avg_8x8_c(const uint8_t *s8, int p) {
+  int i, j;
+  int sum = 0;
+  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+  for (i = 0; i < 8; ++i, s += p)
+    for (j = 0; j < 8; sum += s[j], ++j) {
+    }
+
+  return (sum + 32) >> 6;
+}
+
+unsigned int aom_highbd_avg_4x4_c(const uint8_t *s8, int p) {
+  int i, j;
+  int sum = 0;
+  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+  for (i = 0; i < 4; ++i, s += p)
+    for (j = 0; j < 4; sum += s[j], ++j) {
+    }
+
+  return (sum + 8) >> 4;
+}
+
+void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
+                             int dp, int *min, int *max) {
+  int i, j;
+  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+  const uint16_t *d = CONVERT_TO_SHORTPTR(d8);
+  *min = 255;
+  *max = 0;
+  for (i = 0; i < 8; ++i, s += p, d += dp) {
+    for (j = 0; j < 8; ++j) {
+      int diff = abs(s[j] - d[j]);
+      *min = diff < *min ? diff : *min;
+      *max = diff > *max ? diff : *max;
+    }
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 // src_diff: first pass, 9 bit, dynamic range [-255, 255]
 //           second pass, 12 bit, dynamic range [-2040, 2040]
 static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index bff15eb..4f1bd9c 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -262,8 +262,12 @@
 static AOM_INLINE void fill_variance_8x8avg(const uint8_t *s, int sp,
                                             const uint8_t *d, int dp,
                                             int x16_idx, int y16_idx,
-                                            v16x16 *vst, int pixels_wide,
-                                            int pixels_high, int is_key_frame) {
+                                            v16x16 *vst,
+#if CONFIG_AV1_HIGHBITDEPTH
+                                            int highbd_flag,
+#endif
+                                            int pixels_wide, int pixels_high,
+                                            int is_key_frame) {
   int k;
   for (k = 0; k < 4; k++) {
     int x8_idx = x16_idx + ((k & 1) << 3);
@@ -273,9 +277,19 @@
     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
       int s_avg;
       int d_avg = 128;
+#if CONFIG_AV1_HIGHBITDEPTH
+      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
+        s_avg = aom_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+        if (!is_key_frame)
+          d_avg = aom_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+      } else {
+        s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+        if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+      }
+#else
       s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
       if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
-
+#endif
       sum = s_avg - d_avg;
       sse = sum * sum;
     }
@@ -284,8 +298,11 @@
 }
 
 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
-                              int dp, int x16_idx, int y16_idx, int pixels_wide,
-                              int pixels_high) {
+                              int dp, int x16_idx, int y16_idx,
+#if CONFIG_AV1_HIGHBITDEPTH
+                              int highbd_flag,
+#endif
+                              int pixels_wide, int pixels_high) {
   int k;
   int minmax_max = 0;
   int minmax_min = 255;
@@ -296,8 +313,18 @@
     int min = 0;
     int max = 0;
     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
+#if CONFIG_AV1_HIGHBITDEPTH
+      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
+        aom_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+                              d + y8_idx * dp + x8_idx, dp, &min, &max);
+      } else {
+        aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
+                       dp, &min, &max);
+      }
+#else
       aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
                      &min, &max);
+#endif
       if ((max - min) > minmax_max) minmax_max = (max - min);
       if ((max - min) < minmax_min) minmax_min = (max - min);
     }
@@ -308,6 +335,9 @@
 static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
                                             const uint8_t *d, int dp,
                                             int x8_idx, int y8_idx, v8x8 *vst,
+#if CONFIG_AV1_HIGHBITDEPTH
+                                            int highbd_flag,
+#endif
                                             int pixels_wide, int pixels_high,
                                             int is_key_frame) {
   int k;
@@ -319,8 +349,20 @@
     if (x4_idx < pixels_wide && y4_idx < pixels_high) {
       int s_avg;
       int d_avg = 128;
+#if CONFIG_AV1_HIGHBITDEPTH
+      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
+        s_avg = aom_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+        if (!is_key_frame)
+          d_avg = aom_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+      } else {
+        s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+        if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+      }
+#else
       s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
       if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+#endif
+
       sum = s_avg - d_avg;
       sse = sum * sum;
     }
@@ -708,8 +750,11 @@
         force_split[split_index] = 0;
         variance4x4downsample[i2 + j] = 0;
         if (!is_key_frame) {
-          fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide,
-                               pixels_high, is_key_frame);
+          fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
+#if CONFIG_AV1_HIGHBITDEPTH
+                               xd->cur_buf->flags,
+#endif
+                               pixels_wide, pixels_high, is_key_frame);
           fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
           get_variance(&vt->split[m].split[i].split[j].part_variances.none);
           avg_16x16[m][i] +=
@@ -741,6 +786,9 @@
             // compute the minmax over the 8x8 sub-blocks, and if above
             // threshold, force split to 8x8 block for this 16x16 block.
             int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
+#if CONFIG_AV1_HIGHBITDEPTH
+                                            xd->cur_buf->flags,
+#endif
                                             pixels_wide, pixels_high);
             int thresh_minmax = (int)cpi->vbp_threshold_minmax;
             if (minmax > thresh_minmax) {
@@ -760,6 +808,9 @@
             int y8_idx = y16_idx + ((k >> 1) << 3);
             v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
             fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
+#if CONFIG_AV1_HIGHBITDEPTH
+                                 xd->cur_buf->flags,
+#endif
                                  pixels_wide, pixels_high, is_key_frame);
           }
         }