Loopfilter: Fix chroma edge selection

A 32x32 transform should have no internal filtering (check c==4)

Change-Id: I7414cf4748ed053208217692ef00cd8b20d49a91
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 49705ff..94bf2af 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -568,7 +568,7 @@
                                     unsigned int mask_16x16,
                                     unsigned int mask_8x8,
                                     unsigned int mask_4x4,
-                                    unsigned int mask_4x4_1,
+                                    unsigned int mask_4x4_int,
                                     const struct loop_filter_info *lfi) {
   unsigned int mask;
 
@@ -579,7 +579,7 @@
                                    lfi->hev_thr, 1);
         assert(!(mask_8x8 & 1));
         assert(!(mask_4x4 & 1));
-        assert(!(mask_4x4_1 & 1));
+        assert(!(mask_4x4_int & 1));
       } else if (mask_8x8 & 1) {
         vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
                                         lfi->hev_thr, 1);
@@ -594,7 +594,7 @@
         assert(0);
       }
 
-      if (mask_4x4_1 & 1)
+      if (mask_4x4_int & 1)
         vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim,
                                       lfi->hev_thr, 1);
     }
@@ -603,7 +603,7 @@
     mask_16x16 >>= 1;
     mask_8x8 >>= 1;
     mask_4x4 >>= 1;
-    mask_4x4_1 >>= 1;
+    mask_4x4_int >>= 1;
   }
 }
 
@@ -611,7 +611,7 @@
                                      unsigned int mask_16x16,
                                      unsigned int mask_8x8,
                                      unsigned int mask_4x4,
-                                     unsigned int mask_4x4_1,
+                                     unsigned int mask_4x4_int,
                                      int only_4x4_1,
                                      const struct loop_filter_info *lfi) {
   unsigned int mask;
@@ -624,7 +624,7 @@
                                        lfi->hev_thr, 1);
           assert(!(mask_8x8 & 1));
           assert(!(mask_4x4 & 1));
-          assert(!(mask_4x4_1 & 1));
+          assert(!(mask_4x4_int & 1));
         } else if (mask_8x8 & 1) {
           vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
                                             lfi->hev_thr, 1);
@@ -640,7 +640,7 @@
         }
       }
 
-      if (mask_4x4_1 & 1)
+      if (mask_4x4_int & 1)
         vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
                                         lfi->lim, lfi->hev_thr, 1);
     }
@@ -649,13 +649,14 @@
     mask_16x16 >>= 1;
     mask_8x8 >>= 1;
     mask_4x4 >>= 1;
-    mask_4x4_1 >>= 1;
+    mask_4x4_int >>= 1;
   }
 }
 
 static void filter_block_plane(VP9_COMMON *cm, MACROBLOCKD *xd,
                                int plane, int mi_row, int mi_col) {
   const int ss_x = xd->plane[plane].subsampling_x;
+  const int ss_y = xd->plane[plane].subsampling_y;
   const int row_step = 1 << xd->plane[plane].subsampling_y;
   const int col_step = 1 << xd->plane[plane].subsampling_x;
   struct buf_2d * const dst = &xd->plane[plane].dst;
@@ -664,7 +665,7 @@
   unsigned int mask_16x16[64 / MI_SIZE] = {0};
   unsigned int mask_8x8[64 / MI_SIZE] = {0};
   unsigned int mask_4x4[64 / MI_SIZE] = {0};
-  unsigned int mask_4x4_1[64 / MI_SIZE] = {0};
+  unsigned int mask_4x4_int[64 / MI_SIZE] = {0};
   struct loop_filter_info lfi[64 / MI_SIZE][64 / MI_SIZE];
   int r, c;
 
@@ -689,6 +690,7 @@
       const int skip_this_r = skip_this && !block_edge_above;
       const TX_SIZE tx_size = plane ? get_uv_tx_size(&mi[c].mbmi)
                                     : mi[c].mbmi.txfm_size;
+      const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
 
       // Filter level can vary per MI
       if (!build_lfi(cm, &mi[c].mbmi,
@@ -697,33 +699,33 @@
 
       // Build masks based on the transform size of each block
       if (tx_size == TX_32X32) {
-        if (!skip_this_c && (c & 3) == 0)
+        if (!skip_this_c && ((c >> ss_x) & 3) == 0)
           mask_16x16_c |= 1 << (c >> ss_x);
-        if (!skip_this_r && (r & 3) == 0)
+        if (!skip_this_r && ((r >> ss_y) & 3) == 0)
           mask_16x16[r] |= 1 << (c >> ss_x);
       } else if (tx_size == TX_16X16) {
-        if (!skip_this_c && (c & 1) == 0)
+        if (!skip_this_c && ((c >> ss_x) & 1) == 0)
           mask_16x16_c |= 1 << (c >> ss_x);
-        if (!skip_this_r && (r & 1) == 0)
+        if (!skip_this_r && ((r >> ss_y) & 1) == 0)
           mask_16x16[r] |= 1 << (c >> ss_x);
       } else {
         // force 8x8 filtering on 32x32 boundaries
         if (!skip_this_c) {
-          if (tx_size == TX_8X8 || (c & 3) == 0)
+          if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
             mask_8x8_c |= 1 << (c >> ss_x);
           else
             mask_4x4_c |= 1 << (c >> ss_x);
         }
 
         if (!skip_this_r) {
-          if (tx_size == TX_8X8 || (r & 3) == 0)
+          if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
             mask_8x8[r] |= 1 << (c >> ss_x);
           else
             mask_4x4[r] |= 1 << (c >> ss_x);
         }
 
-        if (!skip_this && tx_size < TX_8X8)
-          mask_4x4_1[r] |= 1 << (c >> ss_x);
+        if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
+          mask_4x4_int[r] |= 1 << (c >> ss_x);
       }
     }
 
@@ -733,7 +735,7 @@
                             mask_16x16_c & border_mask,
                             mask_8x8_c & border_mask,
                             mask_4x4_c & border_mask,
-                            mask_4x4_1[r], lfi[r]);
+                            mask_4x4_int[r], lfi[r]);
     dst->buf += 8 * dst->stride;
     xd->mode_info_context += cm->mode_info_stride * row_step;
   }
@@ -742,11 +744,14 @@
   dst->buf = dst0;
   xd->mode_info_context = mi0;
   for (r = 0; r < 64 / MI_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
+    const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+    const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
+
     filter_selectively_horiz(dst->buf, dst->stride,
                              mask_16x16[r],
                              mask_8x8[r],
                              mask_4x4[r],
-                             mask_4x4_1[r], mi_row + r == 0, lfi[r]);
+                             mask_4x4_int_r, mi_row + r == 0, lfi[r]);
     dst->buf += 8 * dst->stride;
     xd->mode_info_context += cm->mode_info_stride * row_step;
   }