Fix a bug related to tile based decoding

When the sample size is larger than 1.
The calculation of image's height and width should be rounding up not down.

Change-Id: I6c2ad1f630d1f8f9392594887e23f294ecde2352
diff --git a/jdapistd.c b/jdapistd.c
index 42060c4..8d6e591 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -204,7 +204,8 @@
  * Initialize the jpeg decoder to decompressing a rectangle with size of (width, height)
  * and its upper-left corner located at (start_x, start_y).
  * Align start_x and start_y to multiplies of iMCU width and height, respectively.
- * Also, the new reader position will be returned in (start_x, start_y).
+ * Also, the new reader position and sampled image size will be returned in
+ * (start_x, start_y) and (width, height), respectively.
  */
 
 GLOBAL(void)
@@ -217,15 +218,15 @@
   int row_offset = *start_y / lines_per_iMCU_row;
   int col_left_boundary = ((*start_x / lines_per_iMCU_col)
             / index->MCU_sample_size) * index->MCU_sample_size;
-  int col_right_boundary = (*start_x + *width + lines_per_iMCU_col - 1)
-            / lines_per_iMCU_col;
+  int col_right_boundary =
+                  jdiv_round_up(*start_x + *width, lines_per_iMCU_col);
 
   *height = (*start_y - row_offset * lines_per_iMCU_row) + *height;
   *start_x = col_left_boundary * lines_per_iMCU_col;
   *start_y = row_offset * lines_per_iMCU_row;
-  cinfo->image_width = jmin(cinfo->original_image_width -
-          col_left_boundary * lines_per_iMCU_col,
-          (col_right_boundary - col_left_boundary) * lines_per_iMCU_col);
+  cinfo->image_width = jmin(cinfo->original_image_width,
+          col_right_boundary * lines_per_iMCU_col) -
+          col_left_boundary * lines_per_iMCU_col;
   cinfo->input_iMCU_row = row_offset;
   cinfo->output_iMCU_row = row_offset;
 
@@ -239,10 +240,11 @@
   else
     jpeg_decompress_per_scan_setup(cinfo);
 
-  int sampleSize = cinfo->image_width / cinfo->output_width;
-  *height /= sampleSize;
+  int sample_size = DCTSIZE / cinfo->min_DCT_scaled_size;
+
+  *height = jdiv_round_up(*height, sample_size);
   *width = cinfo->output_width;
-  cinfo->output_scanline = lines_per_iMCU_row * row_offset / sampleSize;
+  cinfo->output_scanline = lines_per_iMCU_row * row_offset / sample_size;
   cinfo->inputctl->consume_input = cinfo->coef->consume_data;
   cinfo->inputctl->consume_input_build_huffman_index =
       cinfo->coef->consume_data_build_huffman_index;
@@ -265,24 +267,23 @@
 
 GLOBAL(JDIMENSION)
 jpeg_read_tile_scanline (j_decompress_ptr cinfo, huffman_index *index,
-        JSAMPARRAY scanlines, int start_x, int start_y, int width, int height)
+        JSAMPARRAY scanlines)
 {
   // Calculates the boundary of iMCU
   int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
   int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
-  int col_left_boundary = ((start_x / lines_per_iMCU_col)
-          / index->MCU_sample_size) * index->MCU_sample_size;
-  int sampleSize = cinfo->image_width / cinfo->output_width;
-  int row_ctr = 0;
+  int sample_size = DCTSIZE / cinfo->min_DCT_scaled_size;
+  JDIMENSION row_ctr = 0;
 
   if (cinfo->progressive_mode) {
     (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, 1);
   } else {
-    if (cinfo->output_scanline % (lines_per_iMCU_row / sampleSize) == 0) {
+    if (cinfo->output_scanline % (lines_per_iMCU_row / sample_size) == 0) {
       // Set the read head to the next iMCU row
       int iMCU_row_offset = cinfo->output_scanline /
-            (lines_per_iMCU_row / sampleSize);
-      int offset_data_col_position = col_left_boundary / index->MCU_sample_size;
+            (lines_per_iMCU_row / sample_size);
+      int offset_data_col_position = cinfo->coef->MCU_column_left_boundary /
+            index->MCU_sample_size;
       huffman_offset_data offset_data =
           index->scan[0].offset[iMCU_row_offset][offset_data_col_position];
       (*cinfo->entropy->configure_huffman_decoder) (cinfo, offset_data);
diff --git a/jdcoefct.c b/jdcoefct.c
index 7a9f993..b10f9bc 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -272,10 +272,9 @@
   unsigned int MCUs_per_row = cinfo->MCUs_per_row;
 #ifdef ANDROID_TILE_BASED_DECODE
   if (cinfo->tile_decode) {
-    MCUs_per_row =
+    MCUs_per_row = jmin(MCUs_per_row,
         (cinfo->coef->column_right_boundary - cinfo->coef->column_left_boundary)
-        * cinfo->entropy->index->MCU_sample_size * cinfo->max_h_samp_factor;
-    MCUs_per_row = jmin(MCUs_per_row, cinfo->MCUs_per_row);
+        * cinfo->entropy->index->MCU_sample_size * cinfo->max_h_samp_factor);
   }
 #endif
 
diff --git a/jdhuff.c b/jdhuff.c
index 9164d17..0d704a5 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -519,6 +519,7 @@
   int blkn, i;
 
   cinfo->restart_interval = 0;
+  cinfo->unread_marker = 0;
 
   unsigned int byte_offset = bitstream_offset >> LOG_TWO_BIT_BUF_SIZE;
   unsigned int bit_in_bit_buffer =
diff --git a/jdmaster.c b/jdmaster.c
index e44d662..e3da758 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -103,9 +103,12 @@
 #endif
 
   /* Prevent application from calling me at wrong times */
-  // FIXME
-  //if (cinfo->global_state != DSTATE_READY)
-  //  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+#if ANDROID_TILE_BASED_DECODE
+  // Tile based decoding may call this function several times.
+  if (!cinfo->tile_decode)
+#endif
+    if (cinfo->global_state != DSTATE_READY)
+      ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
 #ifdef IDCT_SCALING_SUPPORTED
 
diff --git a/jpeglib.h b/jpeglib.h
index 1dccc87..83bed4a 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -1043,9 +1043,7 @@
 					    JDIMENSION max_lines));
 EXTERN(JDIMENSION) jpeg_read_tile_scanline JPP((j_decompress_ptr cinfo,
                         huffman_index *index,
-                        JSAMPARRAY scanlines,
-		                int start_x, int start_y,
-                        int width, int height));
+                        JSAMPARRAY scanlines));
 EXTERN(void) jpeg_init_read_tile_scanline JPP((j_decompress_ptr cinfo,
                         huffman_index *index,
 		                int *start_x, int *start_y,