Add check for trailing bits at end of tile group

Checks for trailing bits except when "large scale tile" is enabled.

Fix writing of trailing bit into correct byte in encoder.

Modify arithmetic coder termination to produce a number of bits
consistent with value returned by aom_daala_stop_encode().

BUG=aomedia:1620

Change-Id: Ic52215a3bfdd367241790f9ab7cfb1cee54f18eb
diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c
index 8ecb0ce..2525eb4 100644
--- a/aom_dsp/entenc.c
+++ b/aom_dsp/entenc.c
@@ -309,8 +309,13 @@
   l = enc->low;
   r = enc->rng;
   c = enc->cnt;
+#if CONFIG_TRAILING_BITS
+  s = 10;
+  m = 0x3FFF;
+#else
   s = 9;
   m = 0x7FFF;
+#endif
   e = (l + m) & ~m;
   while ((e | m) >= l + r) {
     s++;
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index b6684a2..14f03f2 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1811,7 +1811,11 @@
 
 static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
                                    const uint8_t *data_end, int startTile,
+#if CONFIG_TRAILING_BITS
+                                   int endTile, uint32_t *last_bit_pos) {
+#else
                                    int endTile) {
+#endif
   AV1_COMMON *const cm = &pbi->common;
   const int num_planes = av1_num_planes(cm);
   const int tile_cols = cm->tile_cols;
@@ -1981,6 +1985,9 @@
   }
 
   TileData *const td = pbi->tile_data + endTile;
+#if CONFIG_TRAILING_BITS
+  *last_bit_pos = aom_reader_tell(&td->bit_reader) % 8;
+#endif
 
   return aom_reader_find_end(&td->bit_reader);
 }
@@ -3313,13 +3320,23 @@
 void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
                                     const uint8_t *data_end,
                                     const uint8_t **p_data_end, int startTile,
+#if CONFIG_TRAILING_BITS
+                                    int endTile, int initialize_flag,
+                                    uint32_t *last_bit_pos) {
+#else
                                     int endTile, int initialize_flag) {
+#endif
   AV1_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
 
   if (initialize_flag) setup_frame_info(pbi);
 
+#if CONFIG_TRAILING_BITS
+  *p_data_end =
+      decode_tiles(pbi, data, data_end, startTile, endTile, last_bit_pos);
+#else
   *p_data_end = decode_tiles(pbi, data, data_end, startTile, endTile);
+#endif
 
   const int num_planes = av1_num_planes(cm);
   // If the bit stream is monochrome, set the U and V buffers to a constant.
diff --git a/av1/decoder/decodeframe.h b/av1/decoder/decodeframe.h
index a849c71..5e8a4dc 100644
--- a/av1/decoder/decodeframe.h
+++ b/av1/decoder/decodeframe.h
@@ -47,7 +47,12 @@
 void av1_decode_tg_tiles_and_wrapup(struct AV1Decoder *pbi, const uint8_t *data,
                                     const uint8_t *data_end,
                                     const uint8_t **p_data_end, int startTile,
+#if CONFIG_TRAILING_BITS
+                                    int endTile, int initialize_flag,
+                                    uint32_t *last_bit_pos);
+#else
                                     int endTile, int initialize_flag);
+#endif
 
 void av1_read_bitdepth_colorspace_sampling(AV1_COMMON *cm,
                                            struct aom_read_bit_buffer *rb,
diff --git a/av1/decoder/obu.c b/av1/decoder/obu.c
index 9205526..1bb9d66 100644
--- a/av1/decoder/obu.c
+++ b/av1/decoder/obu.c
@@ -263,14 +263,30 @@
   AV1_COMMON *const cm = &pbi->common;
   int startTile, endTile;
   uint32_t header_size, tg_payload_size;
+#if CONFIG_TRAILING_BITS
+  uint32_t last_bit_pos;
+#endif
 
   header_size = read_tile_group_header(pbi, rb, &startTile, &endTile);
   if (startTile > endTile) return header_size;
   data += header_size;
   av1_decode_tg_tiles_and_wrapup(pbi, data, data_end, p_data_end, startTile,
+#if CONFIG_TRAILING_BITS
+                                 endTile, is_first_tg, &last_bit_pos);
+#else
                                  endTile, is_first_tg);
+#endif
+
   tg_payload_size = (uint32_t)(*p_data_end - data);
 
+#if CONFIG_TRAILING_BITS
+  if (!pbi->common.large_scale_tile) {
+    av1_init_read_bit_buffer(pbi, rb, *p_data_end - 1, data_end);
+    rb->bit_offset = last_bit_pos;
+    av1_check_trailing_bits(pbi, rb);
+  }
+#endif
+
   // TODO(shan):  For now, assume all tile groups received in order
   *is_last_tg = endTile == cm->tile_rows * cm->tile_cols - 1;
   return header_size + tg_payload_size;
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 9442fc9..c65b613 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3810,7 +3810,7 @@
           // arithmetic encoder left several 0 bits
           // changing the first 0 bit to 1
           int bit_offset = 7 - nb_bits % 8;
-          *(dst + total_size + tile_size) |= 1 << bit_offset;
+          *(dst + total_size + tile_size - 1) |= 1 << bit_offset;
         }
       }
 #endif