add a new interp filter search strategy.

This commit addes a new strategy to reduce the search for optimal
interpolation filter type. The encoder counts and store how many each
filter type is selected and used for each of the reference frames.
A filter type that is rarely used for all three reference frames is
masked out to avoid computation.

The impact on compression is neglectible:
-0.02% on derf
+0.02% on stdhd

Encoding time is seen to reduce by 2~3%.

Change-Id: Ibafa92291b51185de40da513716222db4b230383
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index bdb1338..9fcb33c 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -294,6 +294,7 @@
       vp9_write_token(w, vp9_switchable_interp_tree,
                       cm->fc.switchable_interp_prob[ctx],
                       &switchable_interp_encodings[mbmi->interp_filter]);
+      ++cpi->interp_filter_selected[0][mbmi->interp_filter];
     } else {
       assert(mbmi->interp_filter == cm->interp_filter);
     }
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 0626f44..9f12d66 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -131,8 +131,10 @@
     if (!is_spatial_svc(cpi))
       cpi->refresh_golden_frame = 1;
     cpi->refresh_alt_ref_frame = 1;
+    vp9_zero(cpi->interp_filter_selected);
   } else {
     cm->fc = cm->frame_contexts[cm->frame_context_idx];
+    vp9_zero(cpi->interp_filter_selected[0]);
   }
 }
 
@@ -1556,17 +1558,32 @@
 
       ref_cnt_fb(cm->frame_bufs,
                  &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
+      vpx_memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
+                 cpi->interp_filter_selected[0],
+                 sizeof(cpi->interp_filter_selected[0]));
     }
 
     if (cpi->refresh_golden_frame) {
       ref_cnt_fb(cm->frame_bufs,
                  &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+      if (!cpi->rc.is_src_frame_alt_ref)
+        vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+                   cpi->interp_filter_selected[0],
+                   sizeof(cpi->interp_filter_selected[0]));
+      else
+        vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+                   cpi->interp_filter_selected[ALTREF_FRAME],
+                   sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
     }
   }
 
   if (cpi->refresh_last_frame) {
     ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
+    if (!cpi->rc.is_src_frame_alt_ref)
+      vpx_memcpy(cpi->interp_filter_selected[LAST_FRAME],
+                 cpi->interp_filter_selected[0],
+                 sizeof(cpi->interp_filter_selected[0]));
   }
 #if CONFIG_VP9_TEMPORAL_DENOISING
   if (cpi->oxcf.noise_sensitivity > 0) {
@@ -2057,6 +2074,33 @@
   }
 }
 
+
+int setup_interp_filter_search_mask(VP9_COMP *cpi) {
+  INTERP_FILTER ifilter;
+  int ref_total[MAX_REF_FRAMES] = {0};
+  MV_REFERENCE_FRAME ref;
+  int mask = 0;
+  if (cpi->common.last_frame_type == KEY_FRAME ||
+      cpi->refresh_alt_ref_frame)
+    return mask;
+  for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
+    for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
+      ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
+
+  for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
+    if ((ref_total[LAST_FRAME] &&
+        cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
+        (ref_total[GOLDEN_FRAME] == 0 ||
+         cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50
+           < ref_total[GOLDEN_FRAME]) &&
+        (ref_total[ALTREF_FRAME] == 0 ||
+         cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50
+           < ref_total[ALTREF_FRAME]))
+      mask |= 1 << ifilter;
+  }
+  return mask;
+}
+
 static void encode_frame_to_data_rate(VP9_COMP *cpi,
                                       size_t *size,
                                       uint8_t *dest,
@@ -2095,6 +2139,12 @@
 
   set_mv_search_params(cpi);
 
+  if (cpi->oxcf.pass == 2 &&
+      cpi->sf.adaptive_interp_filter_search)
+    cpi->sf.interp_filter_search_mask =
+        setup_interp_filter_search_mask(cpi);
+
+
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
     // Reset the loop filter deltas and segmentation map.
@@ -2813,6 +2863,7 @@
         }
       }
 
+
       if (cpi->b_calculate_ssimg) {
         double y, u, v, frame_all;
         frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 9739244..71fc4de 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -290,6 +290,7 @@
   double framerate;
 
   vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
+  int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE];
 
   struct vpx_codec_pkt_list  *output_pkt_list;
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index cfda964..063eb4b 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2263,6 +2263,13 @@
         } else {
           int rate_sum = 0;
           int64_t dist_sum = 0;
+          if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
+              (cpi->sf.interp_filter_search_mask & (1 << i))) {
+            rate_sum = INT_MAX;
+            dist_sum = INT64_MAX;
+            continue;
+          }
+
           if ((cm->interp_filter == SWITCHABLE &&
                (!i || best_needs_copy)) ||
               (cm->interp_filter != SWITCHABLE &&
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 57835ec..a9d9400 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -143,6 +143,7 @@
     sf->mode_skip_start = 6;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
+    sf->adaptive_interp_filter_search = 1;
   }
 
   if (speed >= 4) {
@@ -380,6 +381,8 @@
   sf->force_frame_boost = 0;
   sf->max_delta_qindex = 0;
   sf->disable_filter_search_var_thresh = 0;
+  sf->adaptive_interp_filter_search = 0;
+
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
     sf->intra_uv_mode_mask[i] = INTRA_ALL;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index bad956d..e2e5c1e 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -103,6 +103,12 @@
 } MODE_SEARCH_SKIP_LOGIC;
 
 typedef enum {
+  FLAG_SKIP_EIGHTTAP = 1 << EIGHTTAP,
+  FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH,
+  FLAG_SKIP_EIGHTTAP_SHARP = 1 << EIGHTTAP_SHARP,
+} INTERP_FILTER_MASK;
+
+typedef enum {
   // Search partitions using RD/NONRD criterion
   SEARCH_PARTITION = 0,
 
@@ -380,6 +386,12 @@
   // Early termination in transform size search, which only applies while
   // tx_size_search_method is USE_FULL_RD.
   int tx_size_search_breakout;
+
+  // adaptive interp_filter search to allow skip of certain filter types.
+  int adaptive_interp_filter_search;
+
+  // mask for skip evaluation of certain interp_filter type.
+  INTERP_FILTER_MASK interp_filter_search_mask;
 } SPEED_FEATURES;
 
 struct VP9_COMP;