Add speed feature for compound reference.

Off by default for speed >= 7

Change-Id: I7f6d383f4d3502de4d8c55e53328195e03f2546b
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 259e3c0..437624e 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -529,6 +529,102 @@
   }
 }
 
+static void estimate_comp_ref_frame_costs(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
+    int segment_id, unsigned int (*ref_costs_comp)[REF_FRAMES]) {
+  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+    for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
+      memset(ref_costs_comp[ref_frame], 0,
+             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
+  } else {
+    int intra_inter_ctx = av1_get_intra_inter_context(xd);
+    unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
+
+    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
+      // Similar to single ref, determine cost of compound ref frames.
+      // cost_compound_refs = cost_first_ref + cost_second_ref
+      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
+      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
+      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
+      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
+      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
+
+      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
+      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
+
+      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
+          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
+              base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1];
+      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
+      ref_bicomp_costs[ALTREF_FRAME] = 0;
+
+      // cost of first ref frame
+      ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
+      ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
+      ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
+      ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
+
+      ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0];
+      ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1];
+
+      ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0];
+      ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1];
+
+      // cost of second ref frame
+      ref_bicomp_costs[BWDREF_FRAME] +=
+          x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
+      ref_bicomp_costs[ALTREF2_FRAME] +=
+          x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
+      ref_bicomp_costs[ALTREF_FRAME] +=
+          x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
+
+      ref_bicomp_costs[BWDREF_FRAME] +=
+          x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
+      ref_bicomp_costs[ALTREF2_FRAME] +=
+          x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
+
+      // cost: if one ref frame is forward ref, the other ref is backward ref
+      for (int ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
+        for (int ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
+          ref_costs_comp[ref0][ref1] =
+              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
+        }
+      }
+
+      // cost: if both ref frames are the same side.
+      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
+      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
+      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
+      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
+          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
+      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
+          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
+      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
+          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
+      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
+          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
+          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
+    } else {
+      for (int ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
+        for (int ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
+          ref_costs_comp[ref0][ref1] = 512;
+      }
+      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
+      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
+      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
+      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
+    }
+  }
+}
+
 static void model_rd_from_sse(const AV1_COMP *const cpi,
                               const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
                               int plane, int64_t sse, int num_samples,
@@ -882,7 +978,8 @@
   int ref_frame_skip_mask = 0;
   int best_pred_sad = INT_MAX;
   int best_early_term = 0;
-  unsigned int ref_costs_single[REF_FRAMES];
+  unsigned int ref_costs_single[REF_FRAMES],
+      ref_costs_comp[REF_FRAMES][REF_FRAMES];
   int use_golden_nonzeromv = 1;
   int force_skip_low_temp_var = 0;
   int skip_ref_find_pred[5] = { 0 };
@@ -928,6 +1025,8 @@
   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
 
   estimate_single_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single);
+  if (cpi->sf.use_comp_ref_nonrd)
+    estimate_comp_ref_frame_costs(cm, xd, x, segment_id, ref_costs_comp);
 
   memset(&mode_checked[0][0], 0, MB_MODE_COUNT * REF_FRAMES);
   if (reuse_inter_pred) {
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index f52ac00..a765afc 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -466,6 +466,7 @@
   sf->use_fast_nonrd_pick_mode = 0;
   sf->reuse_inter_pred_nonrd = 0;
   sf->estimate_motion_for_var_based_partition = 1;
+  sf->use_comp_ref_nonrd = 1;
 
   if (speed >= 1) {
     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
@@ -591,11 +592,10 @@
   }
 
   if (speed >= 6) {
-    int i;
     sf->optimize_coefficients = NO_TRELLIS_OPT;
     sf->mv.search_method = HEX;
     sf->disable_filter_search_var_thresh = 500;
-    for (i = 0; i < TX_SIZES; ++i) {
+    for (int i = 0; i < TX_SIZES; ++i) {
       sf->intra_y_mode_mask[i] = INTRA_DC;
       sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
     }
@@ -617,6 +617,7 @@
     // and disabled TX64
     if (!cpi->oxcf.enable_tx64) sf->tx_size_search_method = USE_FAST_RD;
     sf->use_nonrd_pick_mode = 1;
+    sf->use_comp_ref_nonrd = 0;
     sf->inter_mode_rd_model_estimation = 2;
   }
   if (speed >= 8) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 49df338..b767b8c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -712,6 +712,9 @@
   // and only perform a full MV search on the motion vectors that performed
   // well.
   int prune_mode_search_simple_translation;
+
+  // Use compound reference for non-RD mode.
+  int use_comp_ref_nonrd;
 } SPEED_FEATURES;
 
 struct AV1_COMP;