RT: Pick cdef filter strengths based on q for screen content

Train a new model to predict cdef filter strengths for screen
content based on the frame q.

Add a qindex threshold in the speed feature to achieve a better
tradeoff for different speed settings.

Performance:
speed_10    ovr_psnr    avg_psnr    ssim      vmaf
rtc_screen   -3.485%     -1.362%    -4.240%   3.724%

The average slowdown is 2.4%.

For speed 9, a lower threshold is used to retain higher coding gain.

Change-Id: Icf0690429e0739b9c72be90630cb6f4cdb7a670c
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 32d0496..2542076 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2144,11 +2144,16 @@
     start_timing(cpi, cdef_time);
 #endif
     const int num_workers = cpi->mt_info.num_mod_workers[MOD_CDEF];
+    const int use_screen_content_model =
+        cm->quant_params.base_qindex >
+            AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
+                   cpi->rc.best_quality + 5) &&
+        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
     // Find CDEF parameters
     av1_cdef_search(&cpi->mt_info, &cm->cur_frame->buf, cpi->source, cm, xd,
                     cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult,
                     cpi->sf.rt_sf.skip_cdef_sb, cpi->rc.frames_since_key,
-                    cpi->oxcf.tool_cfg.cdef_control,
+                    cpi->oxcf.tool_cfg.cdef_control, use_screen_content_model,
                     cpi->svc.non_reference_frame);
 
     // Apply the filter
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 557c9ee..adbb07c 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -508,7 +508,7 @@
 }
 
 static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
-                              int frames_since_key) {
+                              int frames_since_key, int is_screen_content) {
   const int bd = cm->seq_params->bit_depth;
   const int q =
       av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
@@ -527,32 +527,47 @@
   int predicted_y_f2 = 0;
   int predicted_uv_f1 = 0;
   int predicted_uv_f2 = 0;
-  if (!frame_is_intra_only(cm)) {
-    predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
-                                       q * 0.0068615186f + 0.02709886f),
-                           0, 15);
-    predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
-                                       q * 0.0013993345f + 0.03831067f),
-                           0, 3);
-    predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
-                                        q * 0.0034628846f + 0.00887099f),
-                            0, 15);
-    predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
-                                        q * 0.00028223585f + 0.05576307f),
-                            0, 3);
+  if (is_screen_content) {
+    predicted_y_f1 =
+        (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
+    predicted_y_f2 =
+        (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
+    predicted_uv_f1 =
+        (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
+    predicted_uv_f2 =
+        (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
+    predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
+    predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
+    predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
+    predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
   } else {
-    predicted_y_f1 = clamp(
-        (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
-        0, 15);
-    predicted_y_f2 = clamp(
-        (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f),
-        0, 3);
-    predicted_uv_f1 = clamp(
-        (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f),
-        0, 15);
-    predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
-                                        q * 0.00035520183f + 0.00228092f),
-                            0, 3);
+    if (!frame_is_intra_only(cm)) {
+      predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
+                                         q * 0.0068615186f + 0.02709886f),
+                             0, 15);
+      predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
+                                         q * 0.0013993345f + 0.03831067f),
+                             0, 3);
+      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
+                                          q * 0.0034628846f + 0.00887099f),
+                              0, 15);
+      predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
+                                          q * 0.00028223585f + 0.05576307f),
+                              0, 3);
+    } else {
+      predicted_y_f1 = clamp(
+          (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
+          0, 15);
+      predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
+                                         q * 0.0027798624f + 0.0079405f),
+                             0, 3);
+      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
+                                          q * 0.012892405f - 0.00748388f),
+                              0, 15);
+      predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
+                                          q * 0.00035520183f + 0.00228092f),
+                              0, 3);
+    }
   }
   cdef_info->cdef_strengths[0] =
       predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
@@ -584,7 +599,8 @@
                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
                      int skip_cdef_feature, int frames_since_key,
-                     CDEF_CONTROL cdef_control, int non_reference_frame) {
+                     CDEF_CONTROL cdef_control, const int is_screen_content,
+                     int non_reference_frame) {
   assert(cdef_control != CDEF_NONE);
   if (cdef_control == CDEF_REFERENCE && non_reference_frame) {
     CdefInfo *const cdef_info = &cm->cdef_info;
@@ -596,7 +612,8 @@
   }
 
   if (pick_method == CDEF_PICK_FROM_Q) {
-    pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key);
+    pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key,
+                      is_screen_content);
     return;
   }
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
diff --git a/av1/encoder/pickcdef.h b/av1/encoder/pickcdef.h
index d52cb4b..e070a8a 100644
--- a/av1/encoder/pickcdef.h
+++ b/av1/encoder/pickcdef.h
@@ -228,6 +228,7 @@
  * \param[in]      skip_cdef_feature Speed feature to skip cdef
  * \param[in]      frames_since_key Number of frames since key frame
  * \param[in]      cdef_control  Parameter that controls CDEF application
+ * \param[in]      is_screen_content   Whether it is screen content type
  * \param[in]      non_reference_frame Indicates if current frame is
  * non-reference
  *
@@ -247,7 +248,8 @@
                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
                      int skip_cdef_feature, int frames_since_key,
-                     CDEF_CONTROL cdef_control, int non_reference_frame);
+                     CDEF_CONTROL cdef_control, const int is_screen_content,
+                     int non_reference_frame);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 3b0e151..270f891 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1666,6 +1666,7 @@
   if (speed >= 9) {
     sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
     sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+    sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 20;
     sf->rt_sf.estimate_motion_for_var_based_partition = 0;
     sf->rt_sf.force_large_partition_blocks = 1;
     sf->rt_sf.skip_intra_pred = 2;
@@ -1682,6 +1683,7 @@
     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
     sf->rt_sf.force_half_pel_block = 1;
     sf->rt_sf.reduce_zeromv_mvres = true;
+    sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
   }
 }
 
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index bbf17d5..8bb428f 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1531,6 +1531,13 @@
   // by 8.44% for speed 9 on a typical image dataset with coding performance
   // gain of 0.78%.
   bool vbp_prune_16x16_split_using_min_max_sub_blk_var;
+
+  // A qindex threshold that determines whether to use qindex based
+  // CDEF filter strength estimation for screen content types.
+  // This speed feature has a substantial gain on coding metrics,
+  // with moderate increased encoding time.
+  // Set to zero to turn off this speed feature.
+  int screen_content_cdef_filter_qindex_thresh;
 } REAL_TIME_SPEED_FEATURES;
 
 /*!\endcond */