Allow for re-encoding frame if high overshoot.

For 1 pass CBR mode under screen content mode:
if pre-analysis (source temporal-sad) indicates significant
change in content, then check the projected frame size after
encode_frame(), and if size is above threshold, force re-encode
of that frame at max QP.

Change-Id: I91e66d9f3167aff2ffcc6f16f47f19f1c21dc688
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index c67cb32..84aeeb3 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1623,6 +1623,8 @@
   cpi->resize_buffer_underflow = 0;
   cpi->common.buffer_pool = pool;
 
+  cpi->rc.high_source_sad = 0;
+
   init_config(cpi, oxcf);
   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
 
@@ -3180,7 +3182,9 @@
   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
 }
 
-static void encode_without_recode_loop(VP9_COMP *cpi) {
+static void encode_without_recode_loop(VP9_COMP *cpi,
+                                       size_t *size,
+                                       uint8_t *dest) {
   VP9_COMMON *const cm = &cpi->common;
   int q = 0, bottom_index = 0, top_index = 0;  // Dummy variables.
 
@@ -3210,6 +3214,13 @@
                                                &cpi->scaled_last_source);
   }
 
+  if (cpi->oxcf.pass == 0 &&
+      cpi->oxcf.rc_mode == VPX_CBR &&
+      cpi->resize_state == 0 &&
+      cm->frame_type != KEY_FRAME &&
+      cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+    vp9_avg_source_sad(cpi);
+
   if (frame_is_intra_only(cm) == 0) {
     vp9_scale_references(cpi);
   }
@@ -3237,6 +3248,38 @@
   // transform / motion compensation build reconstruction frame
   vp9_encode_frame(cpi);
 
+  // Check if we should drop this frame because of high overshoot.
+  // Only for frames where high temporal-source sad is detected.
+  if (cpi->oxcf.pass == 0 &&
+      cpi->oxcf.rc_mode == VPX_CBR &&
+      cpi->resize_state == 0 &&
+      cm->frame_type != KEY_FRAME &&
+      cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+      cpi->rc.high_source_sad == 1) {
+    int frame_size = 0;
+    // Get an estimate of the encoded frame size.
+    save_coding_context(cpi);
+    vp9_pack_bitstream(cpi, dest, size);
+    restore_coding_context(cpi);
+    frame_size = (int)(*size) << 3;
+    // Check if encoded frame will overshoot too much, and if so, set the q and
+    // adjust some rate control parameters, and return to re-encode the frame.
+    if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
+      vpx_clear_system_state();
+      vp9_set_quantizer(cm, q);
+      vp9_set_variance_partition_thresholds(cpi, q);
+      suppress_active_map(cpi);
+      // Turn-off cyclic refresh for re-encoded frame.
+      if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+        unsigned char *const seg_map = cpi->segmentation_map;
+        memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
+        vp9_disable_segmentation(&cm->seg);
+      }
+      apply_active_map(cpi);
+      vp9_encode_frame(cpi);
+    }
+  }
+
   // Update some stats from cyclic refresh, and check if we should not update
   // golden reference, for non-SVC 1 pass CBR.
   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
@@ -3736,7 +3779,7 @@
 #endif
 
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
-    encode_without_recode_loop(cpi);
+    encode_without_recode_loop(cpi, size, dest);
   } else {
     encode_with_recode_loop(cpi, size, dest);
   }
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index a0b3f8a..4ba3406 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1900,3 +1900,86 @@
   }
   return resize_now;
 }
+
+// Compute average source sad (temporal sad: between current source and
+// previous source) over a subset of superblocks. Use this is detect big changes
+// in content and allow rate control to react.
+// TODO(marpan): Superblock sad is computed again in variance partition for
+// non-rd mode (but based on last reconstructed frame). Should try to reuse
+// these computations.
+void vp9_avg_source_sad(VP9_COMP *cpi) {
+  VP9_COMMON * const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
+  rc->high_source_sad = 0;
+  if (cpi->Last_Source != NULL) {
+    const uint8_t *src_y = cpi->Source->y_buffer;
+    const int src_ystride = cpi->Source->y_stride;
+    const uint8_t *last_src_y = cpi->Last_Source->y_buffer;
+    const int last_src_ystride = cpi->Last_Source->y_stride;
+    int sbi_row, sbi_col;
+    const BLOCK_SIZE bsize = BLOCK_64X64;
+    // Loop over sub-sample of frame, and compute average sad over 64x64 blocks.
+    uint64_t avg_sad = 0;
+    int num_samples = 0;
+    int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+    int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+    for (sbi_row = 0; sbi_row < sb_rows; sbi_row ++) {
+      for (sbi_col = 0; sbi_col < sb_cols; sbi_col ++) {
+        // Checker-board pattern, ignore boundary.
+        if ((sbi_row > 0 && sbi_col > 0) &&
+            (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) &&
+            ((sbi_row % 2 == 0 && sbi_col % 2 == 0) ||
+            (sbi_row % 2 != 0 && sbi_col % 2 != 0))) {
+          num_samples++;
+          avg_sad += cpi->fn_ptr[bsize].sdf(src_y,
+                                            src_ystride,
+                                            last_src_y,
+                                            last_src_ystride);
+        }
+        src_y += 64;
+        last_src_y += 64;
+      }
+      src_y += (src_ystride << 6) - (sb_cols << 6);
+      last_src_y += (last_src_ystride << 6) - (sb_cols << 6);
+    }
+    if (num_samples > 0)
+      avg_sad = avg_sad / num_samples;
+    // Set high_source_sad flag if we detect very high increase in avg_sad
+    // between current and the previous frame value(s). Use a minimum threshold
+    // for cases where there is small change from content that is completely
+    // static.
+    if (avg_sad > MAX(4000, (rc->avg_source_sad << 3)) &&
+        rc->frames_since_key > 1)
+      rc->high_source_sad = 1;
+    else
+      rc->high_source_sad = 0;
+    rc->avg_source_sad = (rc->avg_source_sad + avg_sad) >> 1;
+  }
+}
+
+// Test if encoded frame will significantly overshoot the target bitrate, and
+// if so, set the QP, reset/adjust some rate control parameters, and return 1.
+int vp9_encodedframe_overshoot(VP9_COMP *cpi,
+                               int frame_size,
+                               int *q) {
+  VP9_COMMON * const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
+  int thresh_qp = 3 * (rc->worst_quality >> 2);
+  int thresh_rate = rc->avg_frame_bandwidth * 10;
+  if (cm->base_qindex < thresh_qp &&
+      frame_size > thresh_rate) {
+    // Force a re-encode, and for now use max-QP.
+    *q = cpi->rc.worst_quality;
+    // Adjust avg_frame_qindex and buffer_level, as these parameters will affect
+    // QP selection for subsequent frames. If they have settled down to a very
+    // different (low QP) state, then not re-adjusting them may cause next
+    // frame to select low QP and overshoot again.
+    // TODO(marpan): Check if rate correction factor should also be adjusted.
+    cpi->rc.avg_frame_qindex[INTER_FRAME] = *q;
+    rc->buffer_level = rc->optimal_buffer_level;
+    rc->bits_off_target = rc->optimal_buffer_level;
+    return 1;
+  } else {
+    return 0;
+  }
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index f9b0488..11dfa35 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -142,6 +142,9 @@
   int frame_width[FRAME_SCALE_STEPS];
   int frame_height[FRAME_SCALE_STEPS];
   int rf_level_maxq[RATE_FACTOR_LEVELS];
+
+  uint64_t avg_source_sad;
+  int high_source_sad;
 } RATE_CONTROL;
 
 struct VP9_COMP;
@@ -256,6 +259,10 @@
 
 int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi);
 
+void vp9_avg_source_sad(struct VP9_COMP *cpi);
+
+int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif