Speed feature to binary search dir intramodes

This speed feature will skip searching the directional intra prediction
modes D63, D117, D27, D153 if the best intra mode so far is not one of
the diagonal, horizontal or vertical directions closest to the respective
directions being tested. In other words, this implements a sort of
binary search in the angular domain.

Speedup: about 9-10%
Results: -0.05% only on derfraw300.

Change-Id: I413584c41f2a3e8dabfbdeb40718c8fc4b1d63a2
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 49582b2..3265726 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -716,6 +716,7 @@
   sf->adjust_partitioning_from_last_frame = 0;
   sf->last_partitioning_redo_frequency = 4;
   sf->disable_splitmv = 0;
+  sf->conditional_oblique_intramodes = 0;
 
 #if CONFIG_MULTIPLE_ARF
   // Switch segmentation off.
@@ -751,6 +752,7 @@
                                      USE_LARGESTINTRA);
         sf->disable_splitmv =
             (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
+        sf->conditional_oblique_intramodes = 1;
       }
       if (speed == 2) {
         sf->adjust_thresholds_by_speed = 1;
@@ -766,6 +768,7 @@
                                       cpi->common.show_frame == 0) ?
                                      USE_FULL_RD :
                                      USE_LARGESTALL);
+        sf->conditional_oblique_intramodes = 1;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -775,6 +778,8 @@
                                       cpi->common.show_frame == 0) ?
                                      USE_FULL_RD :
                                      USE_LARGESTALL);
+        sf->reduce_first_step_size = 1;
+        sf->conditional_oblique_intramodes = 1;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -785,6 +790,7 @@
                                       cpi->common.show_frame == 0) ?
                                      USE_FULL_RD :
                                      USE_LARGESTALL);
+        sf->conditional_oblique_intramodes = 1;
       }
       /*
       if (speed == 2) {
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index bc1e54b..c947fe4 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -242,6 +242,10 @@
   int adjust_partitioning_from_last_frame;
   int last_partitioning_redo_frequency;
   int disable_splitmv;
+  // Search the D27, D63, D117 and D153 modes
+  // only if the best intra mode so far is one
+  // of the two directional modes nearest to each.
+  int conditional_oblique_intramodes;
 } SPEED_FEATURES;
 
 enum BlockSize {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 38460a5..6251623 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1122,6 +1122,27 @@
     *psse = sse[mbmi->txfm_size];
 }
 
+static int conditional_skip(MB_PREDICTION_MODE mode,
+                            MB_PREDICTION_MODE best_intra_mode) {
+  if (mode == D117_PRED &&
+      best_intra_mode != V_PRED &&
+      best_intra_mode != D135_PRED)
+    return 1;
+  if (mode == D63_PRED &&
+      best_intra_mode != V_PRED &&
+      best_intra_mode != D45_PRED)
+    return 1;
+  if (mode == D27_PRED &&
+      best_intra_mode != H_PRED &&
+      best_intra_mode != D45_PRED)
+    return 1;
+  if (mode == D153_PRED &&
+      best_intra_mode != H_PRED &&
+      best_intra_mode != D135_PRED)
+    return 1;
+  return 0;
+}
+
 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                      MB_PREDICTION_MODE *best_mode,
                                      int *bmode_costs,
@@ -1159,6 +1180,12 @@
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     int64_t this_rd;
     int ratey = 0;
+    // Only do the oblique modes if the best so far is
+    // one of the neighboring directional modes
+    if (cpi->sf.conditional_oblique_intramodes) {
+      if (conditional_skip(mode, *best_mode))
+          continue;
+    }
 
     rate = bmode_costs[mode];
     distortion = 0;
@@ -2855,6 +2882,8 @@
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
   int64_t best_overall_rd = INT64_MAX;
+  int64_t best_intra_rd = INT64_MAX;
+  MB_PREDICTION_MODE best_intra_mode = DC_PRED;
   INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
   INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
   int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
@@ -3130,6 +3159,12 @@
         txfm_cache[i] = txfm_cache[ONLY_4X4];
     } else if (ref_frame == INTRA_FRAME) {
       TX_SIZE uv_tx;
+      // Only search the oblique modes if the best so far is
+      // one of the neighboring directional modes
+      if (cpi->sf.conditional_oblique_intramodes) {
+        if (conditional_skip(mbmi->mode, best_intra_mode))
+            continue;
+      }
       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
                       bsize, txfm_cache);
 
@@ -3359,14 +3394,13 @@
       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
     }
 
-#if 0
     // Keep record of best intra distortion
-    if ((xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) &&
-        (this_rd < best_intra_rd)) {
+    if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME &&
+        xd->mode_info_context->mbmi.mode <= TM_PRED &&
+        this_rd < best_intra_rd) {
       best_intra_rd = this_rd;
-      *returnintra = distortion2;
+      best_intra_mode = xd->mode_info_context->mbmi.mode;
     }
-#endif
 
     if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME)
       for (i = 0; i < NB_PREDICTION_TYPES; ++i)