Do tile-based jpeg decoding for progressive mode.

Change-Id: I5619105ae6a6e2505d17260431bc7a91170eecd6
diff --git a/jdapimin.c b/jdapimin.c
index dca964c..5c9607e 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -53,6 +53,7 @@
     cinfo->client_data = client_data;
   }
   cinfo->is_decompressor = TRUE;
+  cinfo->tile_decode = FALSE;
 
   /* Initialize a memory manager instance for this object */
   jinit_memory_mgr((j_common_ptr) cinfo);
diff --git a/jdapistd.c b/jdapistd.c
index e3c84dd..42060c4 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -82,6 +82,32 @@
   return output_pass_setup(cinfo);
 }
 
+/*
+ * Tile decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ */
+
+GLOBAL(boolean)
+jpeg_start_tile_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    cinfo->tile_decode = TRUE;
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
 
 /*
  * Set up for an output pass, and perform any dummy pass(es) needed.
@@ -189,14 +215,17 @@
   int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
   int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
   int row_offset = *start_y / lines_per_iMCU_row;
-  int col_left_boundary = ((*start_x / lines_per_iMCU_col) / index->MCU_sample_size)
-      * index->MCU_sample_size;
-  int col_right_boundary = (*start_x + *width + lines_per_iMCU_col - 1) / lines_per_iMCU_col;
+  int col_left_boundary = ((*start_x / lines_per_iMCU_col)
+            / index->MCU_sample_size) * index->MCU_sample_size;
+  int col_right_boundary = (*start_x + *width + lines_per_iMCU_col - 1)
+            / lines_per_iMCU_col;
 
   *height = (*start_y - row_offset * lines_per_iMCU_row) + *height;
   *start_x = col_left_boundary * lines_per_iMCU_col;
   *start_y = row_offset * lines_per_iMCU_row;
-  cinfo->image_width = (col_right_boundary - col_left_boundary) * lines_per_iMCU_col;
+  cinfo->image_width = jmin(cinfo->original_image_width -
+          col_left_boundary * lines_per_iMCU_col,
+          (col_right_boundary - col_left_boundary) * lines_per_iMCU_col);
   cinfo->input_iMCU_row = row_offset;
   cinfo->output_iMCU_row = row_offset;
 
@@ -204,14 +233,28 @@
   jinit_color_deconverter(cinfo);
   jpeg_calc_output_dimensions(cinfo);
   jinit_upsampler(cinfo);
-  jpeg_decompress_per_scan_setup(cinfo);
-  cinfo->MCUs_per_row = col_right_boundary - col_left_boundary;
+  (*cinfo->master->prepare_for_output_pass) (cinfo);
+  if (cinfo->progressive_mode)
+    (*cinfo->entropy->start_pass) (cinfo);
+  else
+    jpeg_decompress_per_scan_setup(cinfo);
 
   int sampleSize = cinfo->image_width / cinfo->output_width;
   *height /= sampleSize;
   *width = cinfo->output_width;
   cinfo->output_scanline = lines_per_iMCU_row * row_offset / sampleSize;
-  (*cinfo->master->prepare_for_output_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+  cinfo->inputctl->consume_input_build_huffman_index =
+      cinfo->coef->consume_data_build_huffman_index;
+  cinfo->entropy->index = index;
+  cinfo->input_iMCU_row = row_offset;
+  cinfo->output_iMCU_row = row_offset;
+  cinfo->coef->MCU_column_left_boundary = col_left_boundary;
+  cinfo->coef->MCU_column_right_boundary = col_right_boundary;
+  cinfo->coef->column_left_boundary =
+      col_left_boundary / index->MCU_sample_size;
+  cinfo->coef->column_right_boundary =
+      jdiv_round_up(col_right_boundary, index->MCU_sample_size);
 }
 
 /*
@@ -227,27 +270,30 @@
   // Calculates the boundary of iMCU
   int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
   int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
-  int col_left_boundary = ((start_x / lines_per_iMCU_col) / index->MCU_sample_size)
-      * index->MCU_sample_size;
+  int col_left_boundary = ((start_x / lines_per_iMCU_col)
+          / index->MCU_sample_size) * index->MCU_sample_size;
   int sampleSize = cinfo->image_width / cinfo->output_width;
+  int row_ctr = 0;
 
-  if (cinfo->output_scanline % (lines_per_iMCU_row / sampleSize) == 0) {
-    // Set the read head to the next iMCU row
-    cinfo->unread_marker = 0;
-    int iMCU_row_offset = cinfo->output_scanline / (lines_per_iMCU_row / sampleSize);
-    int offset_data_col_position = col_left_boundary / index->MCU_sample_size;
-    huffman_offset_data *offset_data =
-        &index->scan[0].offset[iMCU_row_offset][offset_data_col_position];
-
-    jpeg_configure_huffman_decoder(cinfo,
-            offset_data->bitstream_offset, offset_data->prev_dc);
+  if (cinfo->progressive_mode) {
+    (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, 1);
+  } else {
+    if (cinfo->output_scanline % (lines_per_iMCU_row / sampleSize) == 0) {
+      // Set the read head to the next iMCU row
+      int iMCU_row_offset = cinfo->output_scanline /
+            (lines_per_iMCU_row / sampleSize);
+      int offset_data_col_position = col_left_boundary / index->MCU_sample_size;
+      huffman_offset_data offset_data =
+          index->scan[0].offset[iMCU_row_offset][offset_data_col_position];
+      (*cinfo->entropy->configure_huffman_decoder) (cinfo, offset_data);
+    }
+    (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, 1);
   }
 
-  int row_ctr = jpeg_read_scanlines(cinfo, scanlines, 1); // Read one line
+  cinfo->output_scanline += row_ctr;
   return row_ctr;
 }
 
-
 /*
  * Alternate entry point to read raw data.
  * Processes exactly one iMCU row per call, unless suspended.
diff --git a/jdcoefct.c b/jdcoefct.c
index 2ac29f0..7a9f993 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -156,6 +156,14 @@
   jpeg_component_info *compptr;
   inverse_DCT_method_ptr inverse_DCT;
 
+#ifdef ANDROID_TILE_BASED_DECODE
+  if (cinfo->tile_decode) {
+    last_MCU_col =
+        (cinfo->coef->MCU_column_right_boundary -
+         cinfo->coef->MCU_column_left_boundary) - 1;
+  }
+#endif
+
   /* Loop to process as much as one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
@@ -230,9 +238,7 @@
   return JPEG_SUSPENDED;	/* Always indicate nothing was done */
 }
 
-
 #ifdef D_MULTISCAN_FILES_SUPPORTED
-
 /*
  * Consume input data and store it in the full-image coefficient buffer.
  * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
@@ -256,17 +262,37 @@
     compptr = cinfo->cur_comp_info[ci];
     buffer[ci] = (*cinfo->mem->access_virt_barray)
       ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       cinfo->tile_decode ? 0 : cinfo->input_iMCU_row * compptr->v_samp_factor,
        (JDIMENSION) compptr->v_samp_factor, TRUE);
     /* Note: entropy decoder expects buffer to be zeroed,
      * but this is handled automatically by the memory manager
      * because we requested a pre-zeroed array.
      */
   }
+  unsigned int MCUs_per_row = cinfo->MCUs_per_row;
+#ifdef ANDROID_TILE_BASED_DECODE
+  if (cinfo->tile_decode) {
+    MCUs_per_row =
+        (cinfo->coef->column_right_boundary - cinfo->coef->column_left_boundary)
+        * cinfo->entropy->index->MCU_sample_size * cinfo->max_h_samp_factor;
+    MCUs_per_row = jmin(MCUs_per_row, cinfo->MCUs_per_row);
+  }
+#endif
+
   /* Loop to process one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+#ifdef ANDROID_TILE_BASED_DECODE
+    if (cinfo->tile_decode) {
+      huffman_scan_header scan_header =
+            cinfo->entropy->index->scan[cinfo->input_scan_number];
+      int col_offset = cinfo->coef->column_left_boundary;
+      (*cinfo->entropy->configure_huffman_decoder) (cinfo,
+              scan_header.offset[cinfo->input_iMCU_row]
+              [col_offset + yoffset * scan_header.MCUs_per_row]);
+    }
+#endif
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < MCUs_per_row;
 	 MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
       blkn = 0;			/* index of current DCT block within MCU */
@@ -277,6 +303,13 @@
           buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
           for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
             coef->MCU_buffer[blkn++] = buffer_ptr++;
+#ifdef ANDROID_TILE_BASED_DECODE
+            if (cinfo->tile_decode && cinfo->input_scan_number == 0) {
+              // need to do pre-zero ourself.
+              jzero_far((void FAR *) coef->MCU_buffer[blkn-1],
+                        (size_t) (SIZEOF(JBLOCK)));
+            }
+#endif
           }
         }
       }
@@ -301,15 +334,45 @@
   return JPEG_SCAN_COMPLETED;
 }
 
-#define  rounded_division(A,B) ((A+B-1)/(B))
+/*
+ * Consume input data and store it in the coefficient buffer.
+ * Read one fully interleaved MCU row ("iMCU" row) per call.
+ */
+
+METHODDEF(int)
+consume_data_multi_scan (j_decompress_ptr cinfo)
+{
+  huffman_index *index = cinfo->entropy->index;
+  int i, retcode, ci;
+  int mcu = cinfo->input_iMCU_row;
+  jinit_phuff_decoder(cinfo);
+  for (i = 0; i < index->scan_count; i++) {
+    (*cinfo->inputctl->finish_input_pass) (cinfo);
+    jset_input_stream_position(cinfo, index->scan[i].bitstream_offset);
+    cinfo->output_iMCU_row = mcu;
+    cinfo->unread_marker = 0;
+    // Consume SOS and DHT headers
+    retcode = (*cinfo->inputctl->consume_markers) (cinfo, index, i);
+    cinfo->input_iMCU_row = mcu;
+    cinfo->input_scan_number = i;
+    cinfo->entropy->index = index;
+    // Consume scan block data
+    consume_data(cinfo);
+  }
+  cinfo->input_iMCU_row = mcu + 1;
+  cinfo->input_scan_number = 0;
+  cinfo->output_scan_number = 0;
+  return JPEG_ROW_COMPLETED;
+}
+
 /*
  * Same as consume_data, expect for saving the Huffman decode information
  * - bitstream offset and DC coefficient to index.
  */
 
 METHODDEF(int)
-consume_data_with_huffman_index (j_decompress_ptr cinfo, huffman_index *index,
-        int current_scan)
+consume_data_build_huffman_index_baseline (j_decompress_ptr cinfo,
+        huffman_index *index, int current_scan)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
   JDIMENSION MCU_col_num;	/* index of current MCU within row */
@@ -317,18 +380,17 @@
   JDIMENSION start_col;
   JBLOCKROW buffer_ptr;
 
-  huffman_scan_header current_header = index->scan[current_scan];
-  current_header.MCU_rows_per_iMCU_row = coef->MCU_rows_per_iMCU_row;
-  current_header.MCUs_per_row = cinfo->MCUs_per_row;
-  current_header.comps_in_scan = cinfo->comps_in_scan;
+  huffman_scan_header *scan_header = index->scan + current_scan;
+  scan_header->MCU_rows_per_iMCU_row = coef->MCU_rows_per_iMCU_row;
 
   size_t allocate_size = coef->MCU_rows_per_iMCU_row
-      * rounded_division(cinfo->MCUs_per_row, index->MCU_sample_size)
+      * jdiv_round_up(cinfo->MCUs_per_row, index->MCU_sample_size)
       * sizeof(huffman_offset_data);
-  current_header.offset[cinfo->input_iMCU_row] = (huffman_offset_data*)malloc(allocate_size);
+  scan_header->offset[cinfo->input_iMCU_row] =
+        (huffman_offset_data*)malloc(allocate_size);
   index->mem_used += allocate_size;
 
-  huffman_offset_data *offset_data = current_header.offset[cinfo->input_iMCU_row];
+  huffman_offset_data *offset_data = scan_header->offset[cinfo->input_iMCU_row];
 
   /* Loop to process one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
@@ -337,8 +399,8 @@
 	 MCU_col_num++) {
       // Record huffman bit offset
       if (MCU_col_num % index->MCU_sample_size == 0) {
-        jpeg_get_huffman_decoder_configuration(cinfo,
-                &offset_data->bitstream_offset, offset_data->prev_dc);
+        (*cinfo->entropy->get_huffman_decoder_configuration)
+                (cinfo, offset_data);
         ++offset_data;
       }
 
@@ -363,6 +425,105 @@
   return JPEG_SCAN_COMPLETED;
 }
 
+/*
+ * Same as consume_data, expect for saving the Huffman decode information
+ * - bitstream offset and DC coefficient to index.
+ */
+
+METHODDEF(int)
+consume_data_build_huffman_index_progressive (j_decompress_ptr cinfo,
+        huffman_index *index, int current_scan)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  int factor = 4; // maximum factor is 4.
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    factor = jmin(factor, cinfo->cur_comp_info[ci]->h_samp_factor);
+
+  int sample_size = index->MCU_sample_size * factor;
+  huffman_scan_header *scan_header = index->scan + current_scan;
+  scan_header->MCU_rows_per_iMCU_row = coef->MCU_rows_per_iMCU_row;
+  scan_header->MCUs_per_row = jdiv_round_up(cinfo->MCUs_per_row, sample_size);
+  scan_header->comps_in_scan = cinfo->comps_in_scan;
+
+  size_t allocate_size = coef->MCU_rows_per_iMCU_row
+      * scan_header->MCUs_per_row * sizeof(huffman_offset_data);
+  scan_header->offset[cinfo->input_iMCU_row] =
+        (huffman_offset_data*)malloc(allocate_size);
+  index->mem_used += allocate_size;
+
+  huffman_offset_data *offset_data = scan_header->offset[cinfo->input_iMCU_row];
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       0, // Only need one row buffer
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+  }
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* For each MCU, we loop through different color components.
+       * Then, for each color component we will get a list of pointers to DCT
+       * blocks in the virtual buffer.
+       */
+      blkn = 0; /* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        /* Get the list of pointers to DCT blocks in
+         * the virtual buffer in a color component of the MCU.
+         */
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+            if (cinfo->input_scan_number == 0) {
+              // need to do pre-zero by ourself.
+              jzero_far((void FAR *) coef->MCU_buffer[blkn-1],
+                        (size_t) (SIZEOF(JBLOCK)));
+            }
+          }
+        }
+      }
+      // Record huffman bit offset
+      if (MCU_col_num % sample_size == 0) {
+        (*cinfo->entropy->get_huffman_decoder_configuration)
+                (cinfo, offset_data);
+        ++offset_data;
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  (*cinfo->entropy->get_huffman_decoder_configuration)
+        (cinfo, &scan_header->prev_MCU_offset);
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
 
 /*
  * Decompress and return some data in the multi-pass case.
@@ -403,7 +564,7 @@
     /* Align the virtual buffer for this component. */
     buffer = (*cinfo->mem->access_virt_barray)
       ((j_common_ptr) cinfo, coef->whole_image[ci],
-       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       cinfo->tile_decode ? 0 : cinfo->output_iMCU_row * compptr->v_samp_factor,
        (JDIMENSION) compptr->v_samp_factor, FALSE);
     /* Count non-dummy DCT block rows in this iMCU row. */
     if (cinfo->output_iMCU_row < last_iMCU_row)
@@ -744,10 +905,58 @@
   cinfo->coef = (struct jpeg_d_coef_controller *) coef;
   coef->pub.start_input_pass = start_input_pass;
   coef->pub.start_output_pass = start_output_pass;
+  coef->pub.column_left_boundary = 0;
+  coef->pub.column_right_boundary = 0;
 #ifdef BLOCK_SMOOTHING_SUPPORTED
   coef->coef_bits_latch = NULL;
 #endif
 
+#ifdef ANDROID_TILE_BASED_DECODE
+  if (cinfo->tile_decode) {
+    if (cinfo->progressive_mode) {
+      /* Allocate one iMCU row virtual array, coef->whole_image[ci],
+       * for each color component, padded to a multiple of h_samp_factor
+       * DCT blocks in the horizontal direction.
+       */
+      int ci, access_rows;
+      jpeg_component_info *compptr;
+
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+        access_rows = compptr->v_samp_factor;
+        coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	   (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	   (JDIMENSION) compptr->v_samp_factor, // one iMCU row
+	   (JDIMENSION) access_rows);
+      }
+      coef->pub.consume_data_build_huffman_index =
+            consume_data_build_huffman_index_progressive;
+      coef->pub.consume_data = consume_data_multi_scan;
+      coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+      coef->pub.decompress_data = decompress_onepass;
+    } else {
+      /* We only need a single-MCU buffer. */
+      JBLOCKROW buffer;
+      int i;
+
+      buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+      for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+        coef->MCU_buffer[i] = buffer + i;
+      }
+      coef->pub.consume_data_build_huffman_index =
+            consume_data_build_huffman_index_baseline;
+      coef->pub.consume_data = dummy_consume_data;
+      coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+      coef->pub.decompress_data = decompress_onepass;
+    }
+    return;
+  }
+#endif
+
   /* Create the coefficient buffer. */
   if (need_full_buffer) {
 #ifdef D_MULTISCAN_FILES_SUPPORTED
@@ -773,7 +982,6 @@
 				(long) compptr->v_samp_factor),
 	 (JDIMENSION) access_rows);
     }
-    coef->pub.consume_data_with_huffman_index = consume_data_with_huffman_index;
     coef->pub.consume_data = consume_data;
     coef->pub.decompress_data = decompress_data;
     coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
@@ -787,11 +995,10 @@
 
     buffer = (JBLOCKROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+		  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
     for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
       coef->MCU_buffer[i] = buffer + i;
     }
-    coef->pub.consume_data_with_huffman_index = consume_data_with_huffman_index;
     coef->pub.consume_data = dummy_consume_data;
     coef->pub.decompress_data = decompress_onepass;
     coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
diff --git a/jdhuff.c b/jdhuff.c
index 5b87a4e..9164d17 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -19,6 +19,8 @@
 #include "jpeglib.h"
 #include "jdhuff.h"		/* Declarations shared with jdphuff.c */
 
+LOCAL(boolean) process_restart (j_decompress_ptr cinfo);
+
 
 /*
  * Expanded entropy decoder object for Huffman decoding.
@@ -293,10 +295,20 @@
 		      int nbits)
 /* Load up the bit buffer to a depth of at least nbits */
 {
+  j_decompress_ptr cinfo = state->cinfo;
+  if (cinfo->tile_decode &&
+      cinfo->restart_interval == 0 &&
+      cinfo->unread_marker >= 0xd0 &&
+      cinfo->unread_marker <= 0xd7 &&
+      nbits > bits_left
+      ) {
+      // Skip the restart marker.
+    cinfo->marker->next_restart_num = cinfo->unread_marker - 0xd0;
+    process_restart(cinfo);
+  }
   /* Copy heavily used state fields into locals (hopefully registers) */
   register const JOCTET * next_input_byte = state->next_input_byte;
   register size_t bytes_in_buffer = state->bytes_in_buffer;
-  j_decompress_ptr cinfo = state->cinfo;
 
   /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
   /* (It is assumed that no request will be for more than that many bits.) */
@@ -497,50 +509,60 @@
 }
 
 /*
- * Configure the Huffman decoder to decode the image
- * starting from (iMCU_row_offset, iMCU_col_offset).
+ * Configure the Huffman decoder reader position and bit buffer.
  */
-
 GLOBAL(void)
 jpeg_configure_huffman_decoder(j_decompress_ptr cinfo,
-              unsigned int bitstream_offset, short int *dc_info)
+        huffman_offset_data offset)
 {
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  unsigned int bitstream_offset = offset.bitstream_offset;
   int blkn, i;
 
-  BITREAD_STATE_VARS;
-  savable_state state;
+  cinfo->restart_interval = 0;
 
   unsigned int byte_offset = bitstream_offset >> LOG_TWO_BIT_BUF_SIZE;
   unsigned int bit_in_bit_buffer =
       bitstream_offset & ((1 << LOG_TWO_BIT_BUF_SIZE) - 1);
 
-  cinfo->src->next_input_byte = cinfo->src->start_input_byte + byte_offset;
-  cinfo->src->bytes_in_buffer = cinfo->src->total_byte - byte_offset;
+  jset_input_stream_position_bit(cinfo, byte_offset,
+          bit_in_bit_buffer, offset.get_buffer);
+}
 
-  entropy->bitstate.bits_left = 0;
+/*
+ * Save the current Huffman decoder position and the bit buffer
+ * into bitstream_offset and get_buffer, respectively.
+ */
+GLOBAL(void)
+jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
 
-  /*
-   * When byte_offset points to the middle of a JPEG marker (2-bytes data
-   * starting with 0xFF), we need to shift the byte_offset backward so that
-   * CHECK_BIT_BUFFER can handle it properly.
-   */
-  for (i = 0; i < 5 || *(cinfo->src->next_input_byte - 1) == 0xFF; i++) {
-    if (cinfo->src->next_input_byte <= cinfo->src->start_input_byte)
-      break;
-    cinfo->src->next_input_byte--;
-    cinfo->src->bytes_in_buffer++;
+  if (cinfo->restart_interval) {
+    // We are at the end of a data segment
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return;
   }
 
-  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-  CHECK_BIT_BUFFER(br_state, BIT_BUF_SIZE, return);
-  while (cinfo->src->total_byte - br_state.bytes_in_buffer < byte_offset) {
-    DROP_BITS(8);
-    CHECK_BIT_BUFFER(br_state, BIT_BUF_SIZE, return);
-  }
-  DROP_BITS(bits_left - bit_in_bit_buffer);
-  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+  offset->bitstream_offset =
+      (jget_input_stream_position(cinfo) << LOG_TWO_BIT_BUF_SIZE)
+      + entropy->bitstate.bits_left;
 
+  offset->get_buffer = entropy->bitstate.get_buffer;
+}
+
+/*
+ * Configure the Huffman decoder to decode the image
+ * starting from the bitstream position recorded in offset.
+ */
+METHODDEF(void)
+configure_huffman_decoder(j_decompress_ptr cinfo, huffman_offset_data offset)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  short int *dc_info = offset.prev_dc;
+  int i;
+  jpeg_configure_huffman_decoder(cinfo, offset);
   for (i = 0; i < cinfo->comps_in_scan; i++) {
     entropy->saved.last_dc_val[i] = dc_info[i];
   }
@@ -550,24 +572,16 @@
  * Save the current Huffman deocde position and the DC coefficients
  * for each component into bitstream_offset and dc_info[], respectively.
  */
-
-GLOBAL(void)
-jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
-              unsigned int *bitstream_offset, short int *dc_info)
+METHODDEF(void)
+get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-
-  BITREAD_STATE_VARS;
-  savable_state state;
+  short int *dc_info = offset->prev_dc;
   int i;
-
-  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(state, entropy->saved);
-
-  *bitstream_offset = ((cinfo->src->total_byte - cinfo->src->bytes_in_buffer)
-          << LOG_TWO_BIT_BUF_SIZE) + bits_left;
+  jpeg_get_huffman_decoder_configuration(cinfo, offset);
   for (i = 0; i < cinfo->comps_in_scan; i++) {
-    dc_info[i] =  state.last_dc_val[i];
+    dc_info[i] = entropy->saved.last_dc_val[i];
   }
 }
 
@@ -796,26 +810,11 @@
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_huff_decoder;
   entropy->pub.decode_mcu = decode_mcu;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-  }
-}
-
-GLOBAL(void)
-jinit_huff_decoder_no_data (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_decoder));
-  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
-  entropy->pub.start_pass = start_pass_huff_decoder;
-  entropy->pub.decode_mcu = decode_mcu;
   entropy->pub.decode_mcu_discard_coef = decode_mcu_discard_coef;
+  entropy->pub.configure_huffman_decoder = configure_huffman_decoder;
+  entropy->pub.get_huffman_decoder_configuration =
+        get_huffman_decoder_configuration;
+  entropy->pub.index = NULL;
 
   /* Mark tables unallocated */
   for (i = 0; i < NUM_HUFF_TBLS; i++) {
@@ -831,15 +830,16 @@
 {
   int i, s;
   index->scan_count = 1;
+  index->total_iMCU_rows = cinfo->total_iMCU_rows;
   index->scan = (huffman_scan_header*)malloc(index->scan_count
           * sizeof(huffman_scan_header));
-  index->total_iMCU_rows = cinfo->total_iMCU_rows;
   index->scan[0].offset = (huffman_offset_data**)malloc(cinfo->total_iMCU_rows
           * sizeof(huffman_offset_data*));
+  index->scan[0].prev_MCU_offset.bitstream_offset = 0;
   index->MCU_sample_size = DEFAULT_MCU_SAMPLE_SIZE;
 
   index->mem_used = sizeof(huffman_scan_header)
-      + cinfo->total_iMCU_rows*sizeof(huffman_offset_data*);
+      + cinfo->total_iMCU_rows * sizeof(huffman_offset_data*);
 }
 
 GLOBAL(void)
@@ -854,3 +854,42 @@
     }
     free(index->scan);
 }
+
+/*
+ * Set the reader byte position to offset
+ */
+GLOBAL(void)
+jset_input_stream_position(j_decompress_ptr cinfo, int offset)
+{
+  if (cinfo->src->seek_input_data) {
+    cinfo->src->seek_input_data(cinfo, offset);
+  } else {
+    cinfo->src->bytes_in_buffer = cinfo->src->current_offset - offset;
+    cinfo->src->next_input_byte = cinfo->src->start_input_byte + offset;
+  }
+}
+
+/*
+ * Set the reader byte position to offset and bit position to bit_left
+ * with bit buffer set to buf.
+ */
+GLOBAL(void)
+jset_input_stream_position_bit(j_decompress_ptr cinfo,
+        int byte_offset, int bit_left, INT32 buf)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  entropy->bitstate.bits_left = bit_left;
+  entropy->bitstate.get_buffer = buf;
+
+  jset_input_stream_position(cinfo, byte_offset);
+}
+
+/*
+ * Get the current reader byte position.
+ */
+GLOBAL(int)
+jget_input_stream_position(j_decompress_ptr cinfo)
+{
+  return cinfo->src->current_offset - cinfo->src->bytes_in_buffer;
+}
diff --git a/jdinput.c b/jdinput.c
index 7f23f34..4261c1a 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -30,7 +30,7 @@
 /* Forward declarations */
 METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
 METHODDEF(int) consume_markers_with_huffman_index JPP((j_decompress_ptr cinfo,
-                    huffman_index *index));
+                    huffman_index *index, int current_scan));
 
 
 /*
@@ -116,6 +116,7 @@
     cinfo->inputctl->has_multiple_scans = TRUE;
   else
     cinfo->inputctl->has_multiple_scans = FALSE;
+  cinfo->original_image_width = cinfo->image_width;
 }
 
 LOCAL(void)
@@ -180,6 +181,15 @@
       tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
       if (tmp == 0) tmp = compptr->MCU_width;
       compptr->last_col_width = tmp;
+#ifdef ANDROID_TILE_BASED_DECODE
+      if (cinfo->tile_decode) {
+        tmp = (int) (jdiv_round_up(cinfo->image_width, 8)
+                % compptr->MCU_width);
+        if (tmp == 0) tmp = compptr->MCU_width;
+        compptr->last_col_width = tmp;
+      }
+#endif
+
       tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
       if (tmp == 0) tmp = compptr->MCU_height;
       compptr->last_row_height = tmp;
@@ -266,7 +276,8 @@
   (*cinfo->entropy->start_pass) (cinfo);
   (*cinfo->coef->start_input_pass) (cinfo);
   cinfo->inputctl->consume_input = cinfo->coef->consume_data;
-  cinfo->inputctl->consume_input_with_huffman_index = cinfo->coef->consume_data_with_huffman_index;
+  cinfo->inputctl->consume_input_build_huffman_index =
+        cinfo->coef->consume_data_build_huffman_index;
 }
 
 
@@ -280,12 +291,14 @@
 finish_input_pass (j_decompress_ptr cinfo)
 {
   cinfo->inputctl->consume_input = consume_markers;
-  cinfo->inputctl->consume_input_with_huffman_index = consume_markers_with_huffman_index;
+  cinfo->inputctl->consume_input_build_huffman_index =
+        consume_markers_with_huffman_index;
 }
 
 
 METHODDEF(int)
-consume_markers_with_huffman_index (j_decompress_ptr cinfo, huffman_index *index)
+consume_markers_with_huffman_index (j_decompress_ptr cinfo,
+        huffman_index *index, int current_scan)
 {
     return consume_markers(cinfo);
 }
@@ -356,7 +369,8 @@
   my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
 
   inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.consume_input_with_huffman_index = consume_markers_with_huffman_index;
+  inputctl->pub.consume_input_build_huffman_index =
+        consume_markers_with_huffman_index;
   inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
   inputctl->pub.eoi_reached = FALSE;
   inputctl->inheaders = TRUE;
@@ -389,7 +403,9 @@
   inputctl->pub.start_input_pass = start_input_pass;
   inputctl->pub.finish_input_pass = finish_input_pass;
 
-  inputctl->pub.consume_input_with_huffman_index = consume_markers_with_huffman_index;
+  inputctl->pub.consume_markers = consume_markers_with_huffman_index;
+  inputctl->pub.consume_input_build_huffman_index =
+        consume_markers_with_huffman_index;
   /* Initialize state: can't use reset_input_controller since we don't
    * want to try to reset other modules yet.
    */
diff --git a/jdmarker.c b/jdmarker.c
index f4cca8c..7332940 100644
--- a/jdmarker.c
+++ b/jdmarker.c
@@ -964,6 +964,14 @@
 	  return JPEG_SUSPENDED;
       }
     }
+
+    /*
+     * Save the position of the fist marker after SOF.
+     */
+    if (cinfo->marker->current_sos_marker_position == -1)
+      cinfo->marker->current_sos_marker_position =
+          jget_input_stream_position(cinfo) - 2;
+
     /* At this point cinfo->unread_marker contains the marker code and the
      * input point is just past the marker proper, but before any parameters.
      * A suspension will cause us to return with this state still true.
@@ -981,6 +989,7 @@
       break;
 
     case M_SOF2:		/* Progressive, Huffman */
+      cinfo->marker->current_sos_marker_position = -1;
       if (! get_sof(cinfo, TRUE, FALSE))
 	return JPEG_SUSPENDED;
       break;
@@ -1233,6 +1242,33 @@
   } /* end loop */
 }
 
+/*
+ * Get the position for all SOS markers in the image.
+ */
+
+METHODDEF(void)
+get_sos_marker_position(j_decompress_ptr cinfo, huffman_index *index)
+{
+  unsigned char *head;
+  int count = 0;
+  int retcode = JPEG_REACHED_SOS;
+
+  while (cinfo->src->bytes_in_buffer > 0) {
+    if (retcode == JPEG_REACHED_SOS) {
+      jpeg_configure_huffman_index_scan(cinfo, index, count++,
+              cinfo->marker->current_sos_marker_position);
+      // Skips scan content to the next non-RST JPEG marker.
+      while(next_marker(cinfo) &&
+              cinfo->unread_marker >= M_RST0 && cinfo->unread_marker <= M_RST7)
+          ;
+      cinfo->marker->current_sos_marker_position =
+        jget_input_stream_position(cinfo) - 2;
+      retcode = read_markers(cinfo);
+    } else {
+      break;
+    }
+  }
+}
 
 /*
  * Reset marker processing state to begin a fresh datastream.
@@ -1273,6 +1309,7 @@
   marker->pub.reset_marker_reader = reset_marker_reader;
   marker->pub.read_markers = read_markers;
   marker->pub.read_restart_marker = read_restart_marker;
+  marker->pub.get_sos_marker_position = get_sos_marker_position;
   /* Initialize COM/APPn processing.
    * By default, we examine and then discard APP0 and APP14,
    * but simply discard COM and all other APPn.
diff --git a/jdphuff.c b/jdphuff.c
index 2267809..a58cdd2 100644
--- a/jdphuff.c
+++ b/jdphuff.c
@@ -83,7 +83,6 @@
 METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo,
 					     JBLOCKROW *MCU_data));
 
-
 /*
  * Initialize for a Huffman-compressed scan.
  */
@@ -632,6 +631,36 @@
   return FALSE;
 }
 
+/*
+ * Configure the Huffman decoder to decode the image
+ * starting from (iMCU_row_offset, iMCU_col_offset).
+ */
+METHODDEF(void)
+configure_huffman_decoder(j_decompress_ptr cinfo, huffman_offset_data offset)
+{
+  int i;
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  jpeg_configure_huffman_decoder(cinfo, offset);
+  entropy->saved.EOBRUN = offset.EOBRUN;
+  for (i = 0; i < cinfo->comps_in_scan; i++)
+    entropy->saved.last_dc_val[i] = offset.prev_dc[i];
+}
+
+/*
+ * Save the current Huffman deocde position and the DC coefficients
+ * for each component into bitstream_offset and dc_info[], respectively.
+ */
+METHODDEF(void)
+get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
+{
+  int i;
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  jpeg_get_huffman_decoder_configuration(cinfo, offset);
+  offset->EOBRUN = entropy->saved.EOBRUN;
+  for (i = 0; i < cinfo->comps_in_scan; i++)
+    offset->prev_dc[i] = entropy->saved.last_dc_val[i];
+}
 
 /*
  * Module initialization routine for progressive Huffman entropy decoding.
@@ -649,6 +678,9 @@
 				SIZEOF(phuff_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_phuff_decoder;
+  entropy->pub.configure_huffman_decoder = configure_huffman_decoder;
+  entropy->pub.get_huffman_decoder_configuration =
+        get_huffman_decoder_configuration;
 
   /* Mark derived tables unallocated */
   for (i = 0; i < NUM_HUFF_TBLS; i++) {
@@ -665,4 +697,22 @@
       *coef_bit_ptr++ = -1;
 }
 
+GLOBAL(void)
+jpeg_configure_huffman_index_scan(j_decompress_ptr cinfo,
+        huffman_index *index, int scan_no, int offset)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  if (scan_no >= index->scan_count) {
+    index->scan = realloc(index->scan,
+                    (scan_no + 1) * sizeof(huffman_scan_header));
+    index->mem_used += (scan_no - index->scan_count + 1)
+      * (sizeof(huffman_scan_header) + cinfo->total_iMCU_rows
+      * sizeof(huffman_offset_data*));
+    index->scan_count = scan_no + 1;
+  }
+  index->scan[scan_no].offset = (huffman_offset_data**)malloc(
+          cinfo->total_iMCU_rows * sizeof(huffman_offset_data*));
+  index->scan[scan_no].bitstream_offset = offset;
+}
+
 #endif /* D_PROGRESSIVE_SUPPORTED */
diff --git a/jdtrans.c b/jdtrans.c
index 138a656..586909c 100644
--- a/jdtrans.c
+++ b/jdtrans.c
@@ -16,7 +16,7 @@
 
 
 /* Forward declarations */
-LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo, boolean need_full_buffer));
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
 
 
 /*
@@ -46,7 +46,7 @@
 {
   if (cinfo->global_state == DSTATE_READY) {
     /* First call: initialize active modules */
-    transdecode_master_selection(cinfo, TRUE);
+    transdecode_master_selection(cinfo);
     cinfo->global_state = DSTATE_RDCOEFS;
   }
   if (cinfo->global_state == DSTATE_RDCOEFS) {
@@ -87,12 +87,79 @@
   return NULL;			/* keep compiler happy */
 }
 
-GLOBAL(boolean)
-jpeg_build_huffman_index(j_decompress_ptr cinfo, huffman_index *index)
+LOCAL(boolean)
+jpeg_build_huffman_index_progressive(j_decompress_ptr cinfo,
+        huffman_index *index)
 {
   if (cinfo->global_state == DSTATE_READY) {
+    printf("Progressive Mode\n");
     /* First call: initialize active modules */
-    transdecode_master_selection(cinfo, FALSE);
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    int mcu, i;
+    cinfo->marker->get_sos_marker_position(cinfo, index);
+
+    /* Absorb whole file into the coef buffer */
+    for (mcu = 0; mcu < cinfo->total_iMCU_rows; mcu++) {
+      int retcode = 0;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      jinit_phuff_decoder(cinfo);
+      for (i = 0; i < index->scan_count; i++) {
+        (*cinfo->inputctl->finish_input_pass) (cinfo);
+        jset_input_stream_position(cinfo, index->scan[i].bitstream_offset);
+        cinfo->unread_marker = 0;
+        retcode = (*cinfo->inputctl->consume_input_build_huffman_index)
+                    (cinfo, index, i);
+        if (retcode == JPEG_REACHED_EOI)
+          break;
+        cinfo->input_iMCU_row = mcu;
+        if (mcu != 0)
+          (*cinfo->entropy->configure_huffman_decoder)
+                (cinfo, index->scan[i].prev_MCU_offset);
+        cinfo->input_scan_number = i;
+        retcode = (*cinfo->inputctl->consume_input_build_huffman_index)
+                    (cinfo, index, i);
+      }
+      if (retcode == JPEG_SUSPENDED)
+        return FALSE;
+      if (retcode == JPEG_REACHED_EOI)
+        break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+          cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
+      }
+    }
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return TRUE;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return FALSE;			/* keep compiler happy */
+}
+
+LOCAL(boolean)
+jpeg_build_huffman_index_baseline(j_decompress_ptr cinfo, huffman_index *index)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    printf("Baseline Mode\n");
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
     cinfo->global_state = DSTATE_RDCOEFS;
   }
   if (cinfo->global_state == DSTATE_RDCOEFS) {
@@ -103,18 +170,12 @@
       if (cinfo->progress != NULL)
         (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       /* Absorb some more input */
-      retcode = (*cinfo->inputctl->consume_input_with_huffman_index) (cinfo, index, 0);
+      retcode = (*cinfo->inputctl->consume_input_build_huffman_index)
+                    (cinfo, index, 0);
       if (retcode == JPEG_SUSPENDED)
         return FALSE;
       if (retcode == JPEG_REACHED_EOI)
         break;
-
-      /*
-       * TODO
-       * Baseline have one sacn only.
-       * If we reach scan complete the whole image is processed.
-       * Need changing for progressive mode.
-       */
       if (retcode == JPEG_SCAN_COMPLETED)
         break;
 
@@ -143,6 +204,15 @@
   return FALSE;			/* keep compiler happy */
 }
 
+GLOBAL(boolean)
+jpeg_build_huffman_index(j_decompress_ptr cinfo, huffman_index *index)
+{
+    cinfo->tile_decode = TRUE;
+    if (cinfo->progressive_mode)
+      return jpeg_build_huffman_index_progressive(cinfo, index);
+    else
+      return jpeg_build_huffman_index_baseline(cinfo, index);
+}
 
 /*
  * Master selection of decompression modules for transcoding.
@@ -150,7 +220,7 @@
  */
 
 LOCAL(void)
-transdecode_master_selection (j_decompress_ptr cinfo, boolean need_full_buffer)
+transdecode_master_selection (j_decompress_ptr cinfo)
 {
   /* This is effectively a buffered-image operation. */
   cinfo->buffered_image = TRUE;
@@ -166,16 +236,12 @@
       ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
     } else {
-#ifdef ANDROID_TILE_BASED_DECODE
-      jinit_huff_decoder_no_data(cinfo);
-#else
       jinit_huff_decoder(cinfo);
-#endif
     }
   }
 
   /* Always get a full-image coefficient buffer. */
-  jinit_d_coef_controller(cinfo, need_full_buffer);
+  jinit_d_coef_controller(cinfo, TRUE);
 
   /* We can now tell the memory manager to allocate virtual arrays. */
   (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
diff --git a/jpegint.h b/jpegint.h
index 138d27b..fd6d8bb 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -145,7 +145,9 @@
 /* Input control module */
 struct jpeg_input_controller {
   JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
-  JMETHOD(int, consume_input_with_huffman_index, (j_decompress_ptr cinfo,
+  JMETHOD(int, consume_input_build_huffman_index, (j_decompress_ptr cinfo,
+                    huffman_index *index, int scan_count));
+  JMETHOD(int, consume_markers, (j_decompress_ptr cinfo,
                     huffman_index *index, int scan_count));
   JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
   JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
@@ -168,13 +170,17 @@
 struct jpeg_d_coef_controller {
   JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
   JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
-  JMETHOD(int, consume_data_with_huffman_index, (j_decompress_ptr cinfo,
+  JMETHOD(int, consume_data_build_huffman_index, (j_decompress_ptr cinfo,
                     huffman_index* index, int scan_count));
   JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
   JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
 				 JSAMPIMAGE output_buf));
   /* Pointer to array of coefficient virtual arrays, or NULL if none */
   jvirt_barray_ptr *coef_arrays;
+  int column_left_boundary;
+  int column_right_boundary;
+  int MCU_column_left_boundary;
+  int MCU_column_right_boundary;
 };
 
 /* Decompression postprocessing (color quantization buffer control) */
@@ -197,6 +203,8 @@
    * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
    */
   JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  JMETHOD(void, get_sos_marker_position, (j_decompress_ptr cinfo,
+                    huffman_index *index));
   /* Read a restart marker --- exported for use by entropy decoder only */
   jpeg_marker_parser_method read_restart_marker;
 
@@ -206,6 +214,7 @@
   boolean saw_SOI;		/* found SOI? */
   boolean saw_SOF;		/* found SOF? */
   int next_restart_num;		/* next restart number expected (0-7) */
+  int current_sos_marker_position;
   unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
 };
 
@@ -215,10 +224,16 @@
   JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
 				JBLOCKROW *MCU_data));
   JMETHOD(boolean, decode_mcu_discard_coef, (j_decompress_ptr cinfo));
+  JMETHOD(void, configure_huffman_decoder, (j_decompress_ptr cinfo,
+                    huffman_offset_data offset));
+  JMETHOD(void, get_huffman_decoder_configuration, (j_decompress_ptr cinfo,
+                    huffman_offset_data *offset));
 
   /* This is here to share code between baseline and progressive decoders; */
   /* other modules probably should not use it */
   boolean insufficient_data;	/* set TRUE after emitting warning */
+
+  huffman_index *index;
 };
 
 /* Inverse DCT (also performs dequantization) */
@@ -377,12 +392,20 @@
 /* Utility routines in jutils.c */
 EXTERN(long) jdiv_round_up JPP((long a, long b));
 EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(long) jmin JPP((long a, long b));
 EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
 				    JSAMPARRAY output_array, int dest_row,
 				    int num_rows, JDIMENSION num_cols));
 EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
 				  JDIMENSION num_blocks));
 EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+
+EXTERN(void) jset_input_stream_position JPP((j_decompress_ptr cinfo,
+                    int offset));
+EXTERN(void) jset_input_stream_position_bit JPP((j_decompress_ptr cinfo,
+                    int byte_offset, int bit_left, INT32 buf));
+
+EXTERN(int) jget_input_stream_position JPP((j_decompress_ptr cinfo));
 /* Constant tables in jutils.c */
 #if 0				/* This table is not actually needed in v6a */
 extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
diff --git a/jpeglib.h b/jpeglib.h
index 430e2c0..1dccc87 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -421,7 +421,10 @@
   /* Basic description of image --- filled in by jpeg_read_header(). */
   /* Application may inspect these values to decide how to process image. */
 
-  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
+  JDIMENSION original_image_width;	/* nominal image width (from SOF marker) */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker)
+                               may be changed by tile decode */
   JDIMENSION image_height;	/* nominal image height */
   int num_components;		/* # of color components in JPEG image */
   J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
@@ -539,6 +542,7 @@
   jpeg_component_info * comp_info;
   /* comp_info[i] describes component that appears i'th in SOF */
 
+  boolean tile_decode;         /* TRUE if using tile based decoding */
   boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
   boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
 
@@ -639,6 +643,12 @@
   //  \------ 27 -------/ \---- 5 ----/
   unsigned int bitstream_offset;
   short prev_dc[3];
+
+  // remaining EOBs in EOBRUN
+  unsigned short EOBRUN;
+
+  // save the decoder current bit buffer, entropy->bitstate.get_buffer.
+  INT32 get_buffer;
 } huffman_offset_data;
 
 typedef struct {
@@ -653,6 +663,9 @@
   int MCUs_per_row;
   int MCU_rows_per_iMCU_row;
 
+  // The last MCU position and its dc value in this scan
+  huffman_offset_data prev_MCU_offset;
+
   huffman_offset_data **offset;
 } huffman_scan_header;
 
@@ -674,7 +687,6 @@
   huffman_scan_header *scan;
 } huffman_index;
 
-
 /* "Object" declarations for JPEG modules that may be supplied or called
  * directly by the surrounding application.
  * As with all objects in the JPEG library, these structs only define the
@@ -771,13 +783,14 @@
   const JOCTET * next_input_byte; /* => next byte to read from buffer */
   const JOCTET * start_input_byte; /* => first byte to read from input */
   size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
-  size_t total_byte; /* # of bytes in input */
+  size_t current_offset; /* current readed input offset */
 
   JMETHOD(void, init_source, (j_decompress_ptr cinfo));
   JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
   JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
   JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
   JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, seek_input_data, (j_decompress_ptr cinfo, long byte_offset));
 };
 
 
@@ -1020,6 +1033,7 @@
 
 /* Main entry points for decompression */
 EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_tile_decompress JPP((j_decompress_ptr cinfo));
 EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
 					    JSAMPARRAY scanlines,
 					    JDIMENSION max_lines));
@@ -1073,7 +1087,8 @@
 
 /* Read or write raw DCT coefficients --- useful for lossless transcoding. */
 EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_build_huffman_index JPP((j_decompress_ptr cinfo, huffman_index *index));
+EXTERN(boolean) jpeg_build_huffman_index
+    JPP((j_decompress_ptr cinfo, huffman_index *index));
 EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
 					  jvirt_barray_ptr * coef_arrays));
 EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
@@ -1099,11 +1114,13 @@
 					    int desired));
 
 EXTERN(void) jpeg_configure_huffman_decoder(j_decompress_ptr cinfo,
-                        unsigned int bitstream_offset, short int *dc_info);
+                        huffman_offset_data offset);
 EXTERN(void) jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
-                        unsigned int *bitstream_offset, short int *dc_info);
+                        huffman_offset_data *offset);
 EXTERN(void) jpeg_create_huffman_index(j_decompress_ptr cinfo,
                         huffman_index *index);
+EXTERN(void) jpeg_configure_huffman_index_scan(j_decompress_ptr cinfo,
+                        huffman_index *index, int scan_no, int offset);
 EXTERN(void) jpeg_destroy_huffman_index(huffman_index *index);
 
 
diff --git a/jutils.c b/jutils.c
index d18a955..616ad05 100644
--- a/jutils.c
+++ b/jutils.c
@@ -86,6 +86,12 @@
   return a - (a % b);
 }
 
+GLOBAL(long)
+jmin (long a, long b)
+{
+  return a < b ? a : b;
+}
+
 
 /* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
  * and coefficient-block arrays.  This won't work on 80x86 because the arrays