Reuse previous best prediction mode in partition AB search
This commits add a speed feature to reuse the previous best
prediction mode found in PARTITION_SPLIT and PARTITION_RECT for
PARTITION_AB search. This speed feature is turned on for speed 3 and
above.
Performance:
SPEED_SET | AVG_PSNR | OVR_PSNR | SSIM | SPD
3 | +0.019% | +0.023% | +0.044% | +1.4%
4 | +0.012% | +0.015% | +0.005% | +0.6%
5 | +0.011% | +0.018% | -0.029% | +0.2%
STATS_CHANGED
Change-Id: I788fdbde2d420dc2c9e89dc861d17489ac46bbe7
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 77d2b6a..9c2976b 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -412,6 +412,7 @@
GLOBAL_GLOBALMV,
NEW_NEWMV,
MB_MODE_COUNT,
+ PRED_MODE_INVALID = MB_MODE_COUNT,
INTRA_MODE_START = DC_PRED,
INTRA_MODE_END = NEARESTMV,
DIR_MODE_START = V_PRED,
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index dc8bb0f..f79c911 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -944,7 +944,7 @@
/**@}*/
/*****************************************************************************
- * \name Reference Frame Searc
+ * \name Reference Frame Search
****************************************************************************/
/**@{*/
/*! \brief Sum absolute distortion of the predicted mv for each ref frame.
@@ -1080,6 +1080,11 @@
* Contains the hash table, hash function, and buffer used for intrabc.
*/
IntraBCHashInfo intrabc_hash_info;
+
+ /*! \brief Whether to reuse the mode stored in intermode_cache. */
+ int use_intermode_cache;
+ /*! \brief The mode to reuse during \ref av1_rd_pick_inter_mode_sb. */
+ PREDICTION_MODE intermode_cache;
/**@}*/
/*****************************************************************************
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 6d07ef2..6554be6 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -11,6 +11,7 @@
#include "av1/encoder/context_tree.h"
#include "av1/encoder/encoder.h"
+#include "av1/encoder/rd.h"
static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 1] = {
BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, BLOCK_128X128,
@@ -101,6 +102,8 @@
}
}
+ av1_invalid_rd_stats(&ctx->rd_stats);
+
return ctx;
}
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 5d5af2e..54471d7 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -2249,8 +2249,9 @@
int mi_row, int mi_col, BLOCK_SIZE bsize,
PARTITION_TYPE partition,
const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
- const int ab_mi_pos[SUB_PARTITIONS_AB][2]) {
- const MACROBLOCK *const x = &td->mb;
+ const int ab_mi_pos[SUB_PARTITIONS_AB][2],
+ const PREDICTION_MODE *mode_cache) {
+ MACROBLOCK *const x = &td->mb;
const MACROBLOCKD *const xd = &x->e_mbd;
const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
RD_STATS sum_rdc;
@@ -2259,10 +2260,19 @@
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
// Loop over sub-partitions in AB partition type.
for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
- if (!rd_try_subblock(cpi, td, tile_data, tp, i == SUB_PARTITIONS_AB - 1,
- ab_mi_pos[i][0], ab_mi_pos[i][1], ab_subsize[i],
- *best_rdc, &sum_rdc, partition, ctxs[i]))
+ if (mode_cache && mode_cache[i] != PRED_MODE_INVALID) {
+ x->use_intermode_cache = 1;
+ x->intermode_cache = mode_cache[i];
+ }
+ const int mode_search_success =
+ rd_try_subblock(cpi, td, tile_data, tp, i == SUB_PARTITIONS_AB - 1,
+ ab_mi_pos[i][0], ab_mi_pos[i][1], ab_subsize[i],
+ *best_rdc, &sum_rdc, partition, ctxs[i]);
+ x->use_intermode_cache = 0;
+ x->intermode_cache = PRED_MODE_INVALID;
+ if (!mode_search_success) {
return false;
+ }
}
av1_rd_cost_update(x->rdmult, &sum_rdc);
@@ -2616,7 +2626,8 @@
PC_TREE *pc_tree, PICK_MODE_CONTEXT *dst_ctxs[SUB_PARTITIONS_AB],
PartitionSearchState *part_search_state, RD_STATS *best_rdc,
const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
- const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type) {
+ const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type,
+ const PREDICTION_MODE *mode_cache) {
const AV1_COMMON *const cm = &cpi->common;
PartitionBlkParams blk_params = part_search_state->part_blk_params;
const int mi_row = blk_params.mi_row;
@@ -2641,7 +2652,7 @@
// Test this partition and update the best partition.
part_search_state->found_best_partition |= rd_test_partition3(
cpi, td, tile_data, tp, pc_tree, best_rdc, dst_ctxs, mi_row, mi_col,
- bsize, part_type, ab_subsize, ab_mi_pos);
+ bsize, part_type, ab_subsize, ab_mi_pos, mode_cache);
#if CONFIG_COLLECT_PARTITION_STATS
if (partition_timer_on) {
@@ -2683,6 +2694,56 @@
mode_srch_ctx[HORZ_A][1] = &pc_tree->split[1]->none;
}
+static AOM_INLINE void copy_partition_mode_from_mode_context(
+ PREDICTION_MODE *dst_mode, const PICK_MODE_CONTEXT *ctx) {
+ if (ctx && ctx->rd_stats.rate < INT_MAX) {
+ *dst_mode = ctx->mic.mode;
+ } else {
+ *dst_mode = PRED_MODE_INVALID;
+ }
+}
+
+static AOM_INLINE void copy_partition_mode_from_pc_tree(
+ PREDICTION_MODE *dst_mode, const PC_TREE *pc_tree) {
+ if (pc_tree) {
+ copy_partition_mode_from_mode_context(dst_mode, pc_tree->none);
+ } else {
+ *dst_mode = PRED_MODE_INVALID;
+ }
+}
+
+static AOM_INLINE void set_mode_cache_for_partition_ab(
+ PREDICTION_MODE *mode_cache, const PC_TREE *pc_tree,
+ AB_PART_TYPE ab_part_type) {
+ switch (ab_part_type) {
+ case HORZ_A:
+ copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]);
+ copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]);
+ copy_partition_mode_from_mode_context(&mode_cache[2],
+ pc_tree->horizontal[1]);
+ break;
+ case HORZ_B:
+ copy_partition_mode_from_mode_context(&mode_cache[0],
+ pc_tree->horizontal[0]);
+ copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]);
+ copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]);
+ break;
+ case VERT_A:
+ copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]);
+ copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]);
+ copy_partition_mode_from_mode_context(&mode_cache[2],
+ pc_tree->vertical[1]);
+ break;
+ case VERT_B:
+ copy_partition_mode_from_mode_context(&mode_cache[0],
+ pc_tree->vertical[0]);
+ copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]);
+ copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]);
+ break;
+ default: assert(0 && "Invalid ab partition type!\n");
+ }
+}
+
// AB Partitions type search.
static void ab_partitions_search(
AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
@@ -2775,7 +2836,8 @@
cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
}
- // Copy of mode search results if the ctx is ready.
+ // We can copy directly the mode search results if we have already searched
+ // the current block and the contexts match.
if (is_ctx_ready[ab_part_type][0]) {
av1_copy_tree_context(cur_part_ctxs[ab_part_type][0],
mode_srch_ctx[ab_part_type][0][0]);
@@ -2789,11 +2851,19 @@
}
}
+ // Even if the contexts don't match, we can still speed up by reusing the
+ // previous prediction mode.
+ PREDICTION_MODE mode_cache[3] = { PRED_MODE_INVALID, PRED_MODE_INVALID,
+ PRED_MODE_INVALID };
+ if (cpi->sf.inter_sf.reuse_best_prediction_for_part_ab) {
+ set_mode_cache_for_partition_ab(mode_cache, pc_tree, ab_part_type);
+ }
+
// Evaluation of AB partition type.
rd_pick_ab_part(cpi, td, tile_data, tp, x, x_ctx, pc_tree,
cur_part_ctxs[ab_part_type], part_search_state, best_rdc,
ab_subsize[ab_part_type], ab_mi_pos[ab_part_type],
- part_type);
+ part_type, mode_cache);
}
}
@@ -3169,6 +3239,7 @@
// PARTITION_NONE evaluation and cost update.
pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, PARTITION_NONE,
bsize, pc_tree->none, best_remain_rdcost);
+
av1_rd_cost_update(x->rdmult, this_rdc);
#if CONFIG_COLLECT_PARTITION_STATS
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0a4bce7..8edf0bd 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5095,6 +5095,10 @@
num_single_modes_processed += is_single_pred;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+ if (x->use_intermode_cache && this_mode != x->intermode_cache) {
+ continue;
+ }
+
// Apply speed features to decide if this inter mode can be skipped
if (skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args)) continue;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 8d58cac..b34f5cb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -536,6 +536,7 @@
sf->inter_sf.selective_ref_frame = 4;
sf->inter_sf.skip_repeated_ref_mv = 1;
sf->inter_sf.skip_repeated_full_newmv = 1;
+ sf->inter_sf.reuse_best_prediction_for_part_ab = 1;
sf->inter_sf.reuse_compound_type_decision = 1;
sf->inter_sf.txfm_rd_gate_level =
boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
@@ -1130,6 +1131,7 @@
inter_sf->txfm_rd_gate_level = 0;
inter_sf->prune_inter_modes_if_skippable = 0;
inter_sf->disable_masked_comp = 0;
+ inter_sf->reuse_best_prediction_for_part_ab = 0;
}
static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index a28200a..42330af 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -736,6 +736,10 @@
// Enable/disable masked compound.
int disable_masked_comp;
+
+ // Reuse the best prediction modes found in PARTITION_SPLIT and PARTITION_RECT
+ // when encoding PARTITION_AB.
+ int reuse_best_prediction_for_part_ab;
} INTER_MODE_SPEED_FEATURES;
typedef struct INTERP_FILTER_SPEED_FEATURES {