[CFL] Independent search termination for plane and sign

Stop if less than half of the iterations give improvement.

Minor metric changes for a 2.5x speed up of the alpha search.

Results on subset1:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0038 |  0.0466 |  0.1388 |  -0.0103 | -0.0312 | -0.0220 |     0.0330

Change-Id: Ic25a995eee500ffc4b80b73635baf0a710954dc0
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c0bcf17..78525e5 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5373,8 +5373,11 @@
 
   for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
     for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) {
+      int progress = 0;
       for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
+        int flag = 0;
         RD_STATS rd_stats;
+        if (c > 2 && progress < c) break;
         av1_init_rd_stats(&rd_stats);
         for (int i = 0; i < CFL_SIGNS; i++) {
           const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i);
@@ -5394,12 +5397,14 @@
 #if CONFIG_DEBUG
           best_rate_uv[joint_sign][plane] = rd_stats.rate;
 #endif  // CONFIG_DEBUG
+          flag = 2;
           if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue;
           this_rd += mode_rd + best_rd_uv[joint_sign][!plane];
           if (this_rd >= best_rd) continue;
           best_rd = this_rd;
           best_joint_sign = joint_sign;
         }
+        progress += flag;
       }
     }
   }