Support tile-based jpeg decoding.This change only handles the baseline case.

The functionality of tile-based decode is enabled by default.
To disable it, mark off the flag of ANDROID_TILE_BASED_DECODE in Android.mk

Change-Id: Ib9f7839095b0ca55260c28d3c499c871e81332ea
diff --git a/Android.mk b/Android.mk
index 145259d..20f338a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -32,6 +32,9 @@
 LOCAL_CFLAGS += -O3 -fstrict-aliasing -fprefetch-loop-arrays
 #LOCAL_CFLAGS += -march=armv6j
 
+# enable tile based decode
+LOCAL_CFLAGS += -DANDROID_TILE_BASED_DECODE
+
 LOCAL_MODULE:= libjpeg
 
 include $(BUILD_SHARED_LIBRARY)
diff --git a/jdapimin.c b/jdapimin.c
index cadb59f..dca964c 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -371,6 +371,9 @@
   if ((cinfo->global_state == DSTATE_SCANNING ||
        cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
     /* Terminate final pass of non-buffered mode */
+#ifdef ANDROID_TILE_BASED_DECODE
+    cinfo->output_scanline = cinfo->output_height;
+#endif
     if (cinfo->output_scanline < cinfo->output_height)
       ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
     (*cinfo->master->finish_output_pass) (cinfo);
@@ -383,10 +386,12 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
   }
   /* Read until EOI */
+#ifndef ANDROID_TILE_BASED_DECODE
   while (! cinfo->inputctl->eoi_reached) {
     if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
       return FALSE;		/* Suspend, come back later */
   }
+#endif
   /* Do final cleanup */
   (*cinfo->src->term_source) (cinfo);
   /* We can use jpeg_abort to release memory and reset global_state */
diff --git a/jdapistd.c b/jdapistd.c
index c8e3fa0..e3c84dd 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -174,6 +174,78 @@
   cinfo->output_scanline += row_ctr;
   return row_ctr;
 }
+/*
+ * Initialize the jpeg decoder to decompressing a rectangle with size of (width, height)
+ * and its upper-left corner located at (start_x, start_y).
+ * Align start_x and start_y to multiplies of iMCU width and height, respectively.
+ * Also, the new reader position will be returned in (start_x, start_y).
+ */
+
+GLOBAL(void)
+jpeg_init_read_tile_scanline(j_decompress_ptr cinfo, huffman_index *index,
+		     int *start_x, int *start_y, int *width, int *height)
+{
+  // Calculates the boundary of iMCU
+  int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
+  int row_offset = *start_y / lines_per_iMCU_row;
+  int col_left_boundary = ((*start_x / lines_per_iMCU_col) / index->MCU_sample_size)
+      * index->MCU_sample_size;
+  int col_right_boundary = (*start_x + *width + lines_per_iMCU_col - 1) / lines_per_iMCU_col;
+
+  *height = (*start_y - row_offset * lines_per_iMCU_row) + *height;
+  *start_x = col_left_boundary * lines_per_iMCU_col;
+  *start_y = row_offset * lines_per_iMCU_row;
+  cinfo->image_width = (col_right_boundary - col_left_boundary) * lines_per_iMCU_col;
+  cinfo->input_iMCU_row = row_offset;
+  cinfo->output_iMCU_row = row_offset;
+
+  // Updates JPEG decoder parameter
+  jinit_color_deconverter(cinfo);
+  jpeg_calc_output_dimensions(cinfo);
+  jinit_upsampler(cinfo);
+  jpeg_decompress_per_scan_setup(cinfo);
+  cinfo->MCUs_per_row = col_right_boundary - col_left_boundary;
+
+  int sampleSize = cinfo->image_width / cinfo->output_width;
+  *height /= sampleSize;
+  *width = cinfo->output_width;
+  cinfo->output_scanline = lines_per_iMCU_row * row_offset / sampleSize;
+  (*cinfo->master->prepare_for_output_pass) (cinfo);
+}
+
+/*
+ * Read a scanline from the current position.
+ *
+ * Return the number of lines actually read.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_tile_scanline (j_decompress_ptr cinfo, huffman_index *index,
+        JSAMPARRAY scanlines, int start_x, int start_y, int width, int height)
+{
+  // Calculates the boundary of iMCU
+  int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
+  int col_left_boundary = ((start_x / lines_per_iMCU_col) / index->MCU_sample_size)
+      * index->MCU_sample_size;
+  int sampleSize = cinfo->image_width / cinfo->output_width;
+
+  if (cinfo->output_scanline % (lines_per_iMCU_row / sampleSize) == 0) {
+    // Set the read head to the next iMCU row
+    cinfo->unread_marker = 0;
+    int iMCU_row_offset = cinfo->output_scanline / (lines_per_iMCU_row / sampleSize);
+    int offset_data_col_position = col_left_boundary / index->MCU_sample_size;
+    huffman_offset_data *offset_data =
+        &index->scan[0].offset[iMCU_row_offset][offset_data_col_position];
+
+    jpeg_configure_huffman_decoder(cinfo,
+            offset_data->bitstream_offset, offset_data->prev_dc);
+  }
+
+  int row_ctr = jpeg_read_scanlines(cinfo, scanlines, 1); // Read one line
+  return row_ctr;
+}
 
 
 /*
diff --git a/jdcoefct.c b/jdcoefct.c
index 4938d20..2ac29f0 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -263,7 +263,6 @@
      * because we requested a pre-zeroed array.
      */
   }
-
   /* Loop to process one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
@@ -272,14 +271,14 @@
       /* Construct list of pointers to DCT blocks belonging to this MCU */
       blkn = 0;			/* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
       }
       /* Try to fetch the MCU. */
       if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
@@ -302,6 +301,68 @@
   return JPEG_SCAN_COMPLETED;
 }
 
+#define  rounded_division(A,B) ((A+B-1)/(B))
+/*
+ * Same as consume_data, expect for saving the Huffman decode information
+ * - bitstream offset and DC coefficient to index.
+ */
+
+METHODDEF(int)
+consume_data_with_huffman_index (j_decompress_ptr cinfo, huffman_index *index,
+        int current_scan)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKROW buffer_ptr;
+
+  huffman_scan_header current_header = index->scan[current_scan];
+  current_header.MCU_rows_per_iMCU_row = coef->MCU_rows_per_iMCU_row;
+  current_header.MCUs_per_row = cinfo->MCUs_per_row;
+  current_header.comps_in_scan = cinfo->comps_in_scan;
+
+  size_t allocate_size = coef->MCU_rows_per_iMCU_row
+      * rounded_division(cinfo->MCUs_per_row, index->MCU_sample_size)
+      * sizeof(huffman_offset_data);
+  current_header.offset[cinfo->input_iMCU_row] = (huffman_offset_data*)malloc(allocate_size);
+  index->mem_used += allocate_size;
+
+  huffman_offset_data *offset_data = current_header.offset[cinfo->input_iMCU_row];
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      // Record huffman bit offset
+      if (MCU_col_num % index->MCU_sample_size == 0) {
+        jpeg_get_huffman_decoder_configuration(cinfo,
+                &offset_data->bitstream_offset, offset_data->prev_dc);
+        ++offset_data;
+      }
+
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu_discard_coef) (cinfo)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
 
 /*
  * Decompress and return some data in the multi-pass case.
@@ -712,6 +773,7 @@
 				(long) compptr->v_samp_factor),
 	 (JDIMENSION) access_rows);
     }
+    coef->pub.consume_data_with_huffman_index = consume_data_with_huffman_index;
     coef->pub.consume_data = consume_data;
     coef->pub.decompress_data = decompress_data;
     coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
@@ -729,6 +791,7 @@
     for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
       coef->MCU_buffer[i] = buffer + i;
     }
+    coef->pub.consume_data_with_huffman_index = consume_data_with_huffman_index;
     coef->pub.consume_data = dummy_consume_data;
     coef->pub.decompress_data = decompress_onepass;
     coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
diff --git a/jdhuff.c b/jdhuff.c
index b5ba39f..5b87a4e 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -77,7 +77,6 @@
 
 typedef huff_entropy_decoder * huff_entropy_ptr;
 
-
 /*
  * Initialize for a Huffman-compressed scan.
  */
@@ -497,6 +496,80 @@
   return TRUE;
 }
 
+/*
+ * Configure the Huffman decoder to decode the image
+ * starting from (iMCU_row_offset, iMCU_col_offset).
+ */
+
+GLOBAL(void)
+jpeg_configure_huffman_decoder(j_decompress_ptr cinfo,
+              unsigned int bitstream_offset, short int *dc_info)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn, i;
+
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  unsigned int byte_offset = bitstream_offset >> LOG_TWO_BIT_BUF_SIZE;
+  unsigned int bit_in_bit_buffer =
+      bitstream_offset & ((1 << LOG_TWO_BIT_BUF_SIZE) - 1);
+
+  cinfo->src->next_input_byte = cinfo->src->start_input_byte + byte_offset;
+  cinfo->src->bytes_in_buffer = cinfo->src->total_byte - byte_offset;
+
+  entropy->bitstate.bits_left = 0;
+
+  /*
+   * When byte_offset points to the middle of a JPEG marker (2-bytes data
+   * starting with 0xFF), we need to shift the byte_offset backward so that
+   * CHECK_BIT_BUFFER can handle it properly.
+   */
+  for (i = 0; i < 5 || *(cinfo->src->next_input_byte - 1) == 0xFF; i++) {
+    if (cinfo->src->next_input_byte <= cinfo->src->start_input_byte)
+      break;
+    cinfo->src->next_input_byte--;
+    cinfo->src->bytes_in_buffer++;
+  }
+
+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+  CHECK_BIT_BUFFER(br_state, BIT_BUF_SIZE, return);
+  while (cinfo->src->total_byte - br_state.bytes_in_buffer < byte_offset) {
+    DROP_BITS(8);
+    CHECK_BIT_BUFFER(br_state, BIT_BUF_SIZE, return);
+  }
+  DROP_BITS(bits_left - bit_in_bit_buffer);
+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    entropy->saved.last_dc_val[i] = dc_info[i];
+  }
+}
+
+/*
+ * Save the current Huffman deocde position and the DC coefficients
+ * for each component into bitstream_offset and dc_info[], respectively.
+ */
+
+GLOBAL(void)
+jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+              unsigned int *bitstream_offset, short int *dc_info)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  BITREAD_STATE_VARS;
+  savable_state state;
+  int i;
+
+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+  ASSIGN_STATE(state, entropy->saved);
+
+  *bitstream_offset = ((cinfo->src->total_byte - cinfo->src->bytes_in_buffer)
+          << LOG_TWO_BIT_BUF_SIZE) + bits_left;
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    dc_info[i] =  state.last_dc_val[i];
+  }
+}
 
 /*
  * Decode and return one MCU's worth of Huffman-compressed coefficients.
@@ -532,7 +605,6 @@
    * This way, we return uniform gray for the remainder of the segment.
    */
   if (! entropy->pub.insufficient_data) {
-
     /* Load up working state */
     BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
     ASSIGN_STATE(state, entropy->saved);
@@ -626,6 +698,87 @@
   return TRUE;
 }
 
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients.
+ * The propose of this method is to calculate the
+ * data length of one MCU in Huffman-coded format.
+ * Therefore, all coefficients are discarded.
+ */
+
+METHODDEF(boolean)
+decode_mcu_discard_coef (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+      register int s, k, r;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* discard all coefficients */
+      if (entropy->dc_needed[blkn]) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	int ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+      }
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          DROP_BITS(s);
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
 
 /*
  * Module initialization routine for Huffman entropy decoding.
@@ -649,3 +802,55 @@
     entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
   }
 }
+
+GLOBAL(void)
+jinit_huff_decoder_no_data (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.decode_mcu = decode_mcu;
+  entropy->pub.decode_mcu_discard_coef = decode_mcu_discard_coef;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+  }
+}
+
+/*
+ * Call after jpeg_read_header
+ */
+GLOBAL(void)
+jpeg_create_huffman_index(j_decompress_ptr cinfo, huffman_index *index)
+{
+  int i, s;
+  index->scan_count = 1;
+  index->scan = (huffman_scan_header*)malloc(index->scan_count
+          * sizeof(huffman_scan_header));
+  index->total_iMCU_rows = cinfo->total_iMCU_rows;
+  index->scan[0].offset = (huffman_offset_data**)malloc(cinfo->total_iMCU_rows
+          * sizeof(huffman_offset_data*));
+  index->MCU_sample_size = DEFAULT_MCU_SAMPLE_SIZE;
+
+  index->mem_used = sizeof(huffman_scan_header)
+      + cinfo->total_iMCU_rows*sizeof(huffman_offset_data*);
+}
+
+GLOBAL(void)
+jpeg_destroy_huffman_index(huffman_index *index)
+{
+    int i, j;
+    for (i = 0; i < index->scan_count; i++) {
+        for(j = 0; j < index->total_iMCU_rows; j++) {
+            free(index->scan[i].offset[j]);
+        }
+        free(index->scan[i].offset);
+    }
+    free(index->scan);
+}
diff --git a/jdhuff.h b/jdhuff.h
index ae19b6c..5760a13 100644
--- a/jdhuff.h
+++ b/jdhuff.h
@@ -71,6 +71,7 @@
 
 typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
 #define BIT_BUF_SIZE  32	/* size of buffer in bits */
+#define LOG_TWO_BIT_BUF_SIZE  5	/* log_2(BIT_BUF_SIZE) */
 
 /* If long is > 32 bits on your machine, and shifting/masking longs is
  * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
diff --git a/jdinput.c b/jdinput.c
index 0c2ac8f..7f23f34 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -29,6 +29,8 @@
 
 /* Forward declarations */
 METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+METHODDEF(int) consume_markers_with_huffman_index JPP((j_decompress_ptr cinfo,
+                    huffman_index *index));
 
 
 /*
@@ -116,7 +118,6 @@
     cinfo->inputctl->has_multiple_scans = FALSE;
 }
 
-
 LOCAL(void)
 per_scan_setup (j_decompress_ptr cinfo)
 /* Do computations that are needed before processing a JPEG scan */
@@ -194,6 +195,13 @@
   }
 }
 
+GLOBAL(void)
+jpeg_decompress_per_scan_setup(j_decompress_ptr cinfo)
+{
+    per_scan_setup(cinfo);
+}
+
+
 
 /*
  * Save away a copy of the Q-table referenced by each component present
@@ -258,6 +266,7 @@
   (*cinfo->entropy->start_pass) (cinfo);
   (*cinfo->coef->start_input_pass) (cinfo);
   cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+  cinfo->inputctl->consume_input_with_huffman_index = cinfo->coef->consume_data_with_huffman_index;
 }
 
 
@@ -271,9 +280,15 @@
 finish_input_pass (j_decompress_ptr cinfo)
 {
   cinfo->inputctl->consume_input = consume_markers;
+  cinfo->inputctl->consume_input_with_huffman_index = consume_markers_with_huffman_index;
 }
 
 
+METHODDEF(int)
+consume_markers_with_huffman_index (j_decompress_ptr cinfo, huffman_index *index)
+{
+    return consume_markers(cinfo);
+}
 /*
  * Read JPEG markers before, between, or after compressed-data scans.
  * Change state as necessary when a new scan is reached.
@@ -341,6 +356,7 @@
   my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
 
   inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.consume_input_with_huffman_index = consume_markers_with_huffman_index;
   inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
   inputctl->pub.eoi_reached = FALSE;
   inputctl->inheaders = TRUE;
@@ -372,6 +388,8 @@
   inputctl->pub.reset_input_controller = reset_input_controller;
   inputctl->pub.start_input_pass = start_input_pass;
   inputctl->pub.finish_input_pass = finish_input_pass;
+
+  inputctl->pub.consume_input_with_huffman_index = consume_markers_with_huffman_index;
   /* Initialize state: can't use reset_input_controller since we don't
    * want to try to reset other modules yet.
    */
diff --git a/jdmaster.c b/jdmaster.c
index 8925013..e44d662 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -103,8 +103,9 @@
 #endif
 
   /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_READY)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  // FIXME
+  //if (cinfo->global_state != DSTATE_READY)
+  //  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
 #ifdef IDCT_SCALING_SUPPORTED
 
diff --git a/jdtrans.c b/jdtrans.c
index 6c0ab71..138a656 100644
--- a/jdtrans.c
+++ b/jdtrans.c
@@ -16,7 +16,7 @@
 
 
 /* Forward declarations */
-LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo, boolean need_full_buffer));
 
 
 /*
@@ -46,7 +46,7 @@
 {
   if (cinfo->global_state == DSTATE_READY) {
     /* First call: initialize active modules */
-    transdecode_master_selection(cinfo);
+    transdecode_master_selection(cinfo, TRUE);
     cinfo->global_state = DSTATE_RDCOEFS;
   }
   if (cinfo->global_state == DSTATE_RDCOEFS) {
@@ -55,20 +55,20 @@
       int retcode;
       /* Call progress monitor hook if present */
       if (cinfo->progress != NULL)
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       /* Absorb some more input */
       retcode = (*cinfo->inputctl->consume_input) (cinfo);
       if (retcode == JPEG_SUSPENDED)
-	return NULL;
+        return NULL;
       if (retcode == JPEG_REACHED_EOI)
-	break;
+        break;
       /* Advance progress counter if appropriate */
       if (cinfo->progress != NULL &&
 	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
 	  /* startup underestimated number of scans; ratchet up one scan */
-	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	}
+        cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
       }
     }
     /* Set state so that jpeg_finish_decompress does the right thing */
@@ -87,6 +87,62 @@
   return NULL;			/* keep compiler happy */
 }
 
+GLOBAL(boolean)
+jpeg_build_huffman_index(j_decompress_ptr cinfo, huffman_index *index)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo, FALSE);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input_with_huffman_index) (cinfo, index, 0);
+      if (retcode == JPEG_SUSPENDED)
+        return FALSE;
+      if (retcode == JPEG_REACHED_EOI)
+        break;
+
+      /*
+       * TODO
+       * Baseline have one sacn only.
+       * If we reach scan complete the whole image is processed.
+       * Need changing for progressive mode.
+       */
+      if (retcode == JPEG_SCAN_COMPLETED)
+        break;
+
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+        cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return TRUE;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return FALSE;			/* keep compiler happy */
+}
+
 
 /*
  * Master selection of decompression modules for transcoding.
@@ -94,7 +150,7 @@
  */
 
 LOCAL(void)
-transdecode_master_selection (j_decompress_ptr cinfo)
+transdecode_master_selection (j_decompress_ptr cinfo, boolean need_full_buffer)
 {
   /* This is effectively a buffered-image operation. */
   cinfo->buffered_image = TRUE;
@@ -109,12 +165,17 @@
 #else
       ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
-    } else
+    } else {
+#ifdef ANDROID_TILE_BASED_DECODE
+      jinit_huff_decoder_no_data(cinfo);
+#else
       jinit_huff_decoder(cinfo);
+#endif
+    }
   }
 
   /* Always get a full-image coefficient buffer. */
-  jinit_d_coef_controller(cinfo, TRUE);
+  jinit_d_coef_controller(cinfo, need_full_buffer);
 
   /* We can now tell the memory manager to allocate virtual arrays. */
   (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
diff --git a/jpegint.h b/jpegint.h
index 95b00d4..138d27b 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -145,6 +145,8 @@
 /* Input control module */
 struct jpeg_input_controller {
   JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_input_with_huffman_index, (j_decompress_ptr cinfo,
+                    huffman_index *index, int scan_count));
   JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
   JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
   JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
@@ -166,6 +168,8 @@
 struct jpeg_d_coef_controller {
   JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
   JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data_with_huffman_index, (j_decompress_ptr cinfo,
+                    huffman_index* index, int scan_count));
   JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
   JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
 				 JSAMPIMAGE output_buf));
@@ -210,6 +214,7 @@
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
   JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
 				JBLOCKROW *MCU_data));
+  JMETHOD(boolean, decode_mcu_discard_coef, (j_decompress_ptr cinfo));
 
   /* This is here to share code between baseline and progressive decoders; */
   /* other modules probably should not use it */
@@ -357,6 +362,7 @@
 EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder_no_data JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
@@ -364,6 +370,7 @@
 EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
 EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_decompress_per_scan_setup (j_decompress_ptr cinfo);
 /* Memory manager initialization */
 EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
 
diff --git a/jpeglib.h b/jpeglib.h
index 0f3a547..430e2c0 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -633,6 +633,47 @@
   struct jpeg_color_quantizer * cquantize;
 };
 
+typedef struct {
+
+  // |--- byte_offset ---|- bit_left -|
+  //  \------ 27 -------/ \---- 5 ----/
+  unsigned int bitstream_offset;
+  short prev_dc[3];
+} huffman_offset_data;
+
+typedef struct {
+
+  // The header starting position of this scan
+  unsigned int bitstream_offset;
+
+  // Number of components in this scan
+  int comps_in_scan;
+
+  // Number of MCUs in each row
+  int MCUs_per_row;
+  int MCU_rows_per_iMCU_row;
+
+  huffman_offset_data **offset;
+} huffman_scan_header;
+
+#define DEFAULT_MCU_SAMPLE_SIZE 16
+
+typedef struct {
+
+  // The number of MCUs that we sample each time as an index point
+  int MCU_sample_size;
+
+  // Number of scan in this image
+  int scan_count;
+
+  // Number of iMCUs rows in this image
+  int total_iMCU_rows;
+
+  // Memory used by scan struct
+  size_t mem_used;
+  huffman_scan_header *scan;
+} huffman_index;
+
 
 /* "Object" declarations for JPEG modules that may be supplied or called
  * directly by the surrounding application.
@@ -728,7 +769,9 @@
 
 struct jpeg_source_mgr {
   const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  const JOCTET * start_input_byte; /* => first byte to read from input */
   size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+  size_t total_byte; /* # of bytes in input */
 
   JMETHOD(void, init_source, (j_decompress_ptr cinfo));
   JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
@@ -980,6 +1023,19 @@
 EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
 					    JSAMPARRAY scanlines,
 					    JDIMENSION max_lines));
+EXTERN(JDIMENSION) jpeg_read_scanlines_from JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    int line_offset,
+					    JDIMENSION max_lines));
+EXTERN(JDIMENSION) jpeg_read_tile_scanline JPP((j_decompress_ptr cinfo,
+                        huffman_index *index,
+                        JSAMPARRAY scanlines,
+		                int start_x, int start_y,
+                        int width, int height));
+EXTERN(void) jpeg_init_read_tile_scanline JPP((j_decompress_ptr cinfo,
+                        huffman_index *index,
+		                int *start_x, int *start_y,
+                        int *width, int *height));
 EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
 
 /* Replaces jpeg_read_scanlines when reading raw downsampled data. */
@@ -1017,6 +1073,7 @@
 
 /* Read or write raw DCT coefficients --- useful for lossless transcoding. */
 EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_build_huffman_index JPP((j_decompress_ptr cinfo, huffman_index *index));
 EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
 					  jvirt_barray_ptr * coef_arrays));
 EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
@@ -1041,6 +1098,14 @@
 EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
 					    int desired));
 
+EXTERN(void) jpeg_configure_huffman_decoder(j_decompress_ptr cinfo,
+                        unsigned int bitstream_offset, short int *dc_info);
+EXTERN(void) jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+                        unsigned int *bitstream_offset, short int *dc_info);
+EXTERN(void) jpeg_create_huffman_index(j_decompress_ptr cinfo,
+                        huffman_index *index);
+EXTERN(void) jpeg_destroy_huffman_index(huffman_index *index);
+
 
 /* These marker codes are exported since applications and data source modules
  * are likely to want to use them.