Fix loop filter errors

Previously loop filter mask allocation has problems at decoder
in some cases where superres is used, while context buffer
allocation is not called, leading to uninitialized loop filter masks.

This CL makes loop filter masks allocation together with mi, which
is called when superres introduces frame size change.

BUG=aomedia:1906

Also, this CL disables multi-threading loop filter in filter level
search, because loop filter mask has not support multi-thread yet.
It should fix the nightly run failure:

BUG=aomedia:1913

Change-Id: I9c226b96f195f33f36849d4e55ef24b33b2316fd
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index b4fff4d..0e23ca2 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -31,6 +31,38 @@
   return mb_rows * mb_cols;
 }
 
+#if LOOP_FILTER_BITMASK
+static int alloc_loop_filter_mask(AV1_COMMON *cm) {
+  aom_free(cm->lf.lfm);
+  cm->lf.lfm = NULL;
+
+  // Each lfm holds bit masks for all the 4x4 blocks in a max
+  // 64x64 (128x128 for ext_partitions) region.  The stride
+  // and rows are rounded up / truncated to a multiple of 16
+  // (32 for ext_partition).
+  cm->lf.lfm_stride = (cm->mi_cols + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2;
+  cm->lf.lfm_num = ((cm->mi_rows + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2) *
+                   cm->lf.lfm_stride;
+  cm->lf.lfm =
+      (LoopFilterMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm));
+  if (!cm->lf.lfm) return 1;
+
+  unsigned int i;
+  for (i = 0; i < cm->lf.lfm_num; ++i) av1_zero(cm->lf.lfm[i]);
+
+  return 0;
+}
+
+static void free_loop_filter_mask(AV1_COMMON *cm) {
+  if (cm->lf.lfm == NULL) return;
+
+  aom_free(cm->lf.lfm);
+  cm->lf.lfm = NULL;
+  cm->lf.lfm_num = 0;
+  cm->lf.lfm_stride = 0;
+}
+#endif
+
 void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) {
   // Ensure that the decoded width and height are both multiples of
   // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
@@ -47,6 +79,10 @@
   cm->mb_cols = (cm->mi_cols + 2) >> 2;
   cm->mb_rows = (cm->mi_rows + 2) >> 2;
   cm->MBs = cm->mb_rows * cm->mb_cols;
+
+#if LOOP_FILTER_BITMASK
+  alloc_loop_filter_mask(cm);
+#endif
 }
 
 void av1_free_ref_frame_buffers(BufferPool *pool) {
@@ -142,29 +178,6 @@
   aom_free_frame_buffer(&cm->rst_frame);
 }
 
-#if LOOP_FILTER_BITMASK
-static int alloc_loop_filter(AV1_COMMON *cm) {
-  aom_free(cm->lf.lfm);
-  cm->lf.lfm = NULL;
-  if (cm->coded_lossless) return 0;
-  // Each lfm holds bit masks for all the 4x4 blocks in a max
-  // 64x64 (128x128 for ext_partitions) region.  The stride
-  // and rows are rounded up / truncated to a multiple of 16
-  // (32 for ext_partition).
-  cm->lf.lfm_stride = (cm->mi_cols + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2;
-  cm->lf.lfm_num = ((cm->mi_rows + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2) *
-                   cm->lf.lfm_stride;
-  cm->lf.lfm =
-      (LoopFilterMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm));
-  if (!cm->lf.lfm) return 1;
-
-  unsigned int i;
-  for (i = 0; i < cm->lf.lfm_num; ++i) av1_zero(cm->lf.lfm[i]);
-
-  return 0;
-}
-#endif  // LOOP_FILTER_BITMASK
-
 void av1_free_above_context_buffers(AV1_COMMON *cm,
                                     int num_free_above_contexts) {
   int i;
@@ -202,11 +215,8 @@
   av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts);
 
 #if LOOP_FILTER_BITMASK
-  aom_free(cm->lf.lfm);
-  cm->lf.lfm = NULL;
-  cm->lf.lfm_num = 0;
-  cm->lf.lfm_stride = 0;
-#endif  // LOOP_FILTER_BITMASK
+  free_loop_filter_mask(cm);
+#endif
 }
 
 int av1_alloc_above_context_buffers(AV1_COMMON *cm,
@@ -263,10 +273,6 @@
     if (cm->alloc_mi(cm, new_mi_size)) goto fail;
   }
 
-#if LOOP_FILTER_BITMASK
-  if (alloc_loop_filter(cm)) goto fail;
-#endif  // LOOP_FILTER_BITMASK
-
   return 0;
 
 fail:
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 47259c2..445d30c 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -94,7 +94,7 @@
 // TODO(chengchen): Temporal flag serve as experimental flag for WIP
 // bitmask construction.
 // Shall be removed when bitmask code is completely checkedin
-#define LOOP_FILTER_BITMASK 0
+#define LOOP_FILTER_BITMASK 1
 
 #define PROFILE_BITS 3
 // The following three profiles are currently defined.
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 2285d34..66f42fa 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -67,6 +67,12 @@
     case 2: cm->lf.filter_level_v = filter_level[0]; break;
   }
 
+      // TODO(any): please enable multi-thread and remove the flag when loop
+      // filter mask is compatible with multi-thread.
+#if LOOP_FILTER_BITMASK
+  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
+                        plane + 1, partial_frame);
+#else
   if (cpi->num_workers > 1)
     av1_loop_filter_frame_mt(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
                              plane + 1, partial_frame, cpi->workers,
@@ -74,6 +80,7 @@
   else
     av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
                           plane + 1, partial_frame);
+#endif
 
   int highbd = 0;
   highbd = cm->use_highbitdepth;