Some cleanups and fixes.

Separates the logic on transform type selection previously spread out
over a number of files into a separate function. Currently the tx_type
field in b_mode_info is not used, but still left in there to eventually
use for signaling the transform type in the bitstream.

Also, now for tx_type = DCT_DCT, the regular integer DCT is used, as
opposed to the floating point DCT used in conjuction with hybrid
transform.

Results change somewhat due to the transform change, but are within
reasonable limits. The hd/std-hd sets are slightly up, while derf/yt
are slightly down.

Change-Id: I5776840c2239ca2da31ca6cfd7fd1148dc5f9e0f
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 092d9ff..10e7d6d 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -151,14 +151,6 @@
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
-#define ACTIVE_HT 110                // quantization stepsize threshold
-#endif
-
-#if CONFIG_HYBRIDTRANSFORM16X16
-#define ACTIVE_HT16 300
-#endif
-
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -182,50 +174,6 @@
   B_MODE_COUNT
 } B_PREDICTION_MODE;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
-// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
-static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
-  B_PREDICTION_MODE b_mode;
-  switch (mode) {
-    case DC_PRED:
-      b_mode = B_DC_PRED;
-      break;
-    case V_PRED:
-      b_mode = B_VE_PRED;
-      break;
-    case H_PRED:
-      b_mode = B_HE_PRED;
-      break;
-    case TM_PRED:
-      b_mode = B_TM_PRED;
-      break;
-    case D45_PRED:
-      b_mode = B_LD_PRED;
-      break;
-    case D135_PRED:
-      b_mode = B_RD_PRED;
-      break;
-    case D117_PRED:
-      b_mode = B_VR_PRED;
-      break;
-    case D153_PRED:
-      b_mode = B_HD_PRED;
-      break;
-    case D27_PRED:
-      b_mode = B_VL_PRED;
-      break;
-    case D63_PRED:
-      b_mode = B_HU_PRED;
-      break;
-    default :
-      // for debug purpose, to be removed after full testing
-      assert(0);
-      break;
-  }
-  return b_mode;
-}
-#endif
-
 #define VP8_BINTRAMODES (B_HU_PRED + 1)  /* 10 */
 #define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
 
@@ -438,68 +386,150 @@
   int_mv ref_mv[MAX_MV_REFS];
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   int q_index;
-#endif
 
 } MACROBLOCKD;
 
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
+#define ACTIVE_HT 110                // quantization stepsize threshold
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM8X8
+#define ACTIVE_HT8 300
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM16X16
+#define ACTIVE_HT16 300
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
+// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
+static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
+  B_PREDICTION_MODE b_mode;
+  switch (mode) {
+    case DC_PRED:
+      b_mode = B_DC_PRED;
+      break;
+    case V_PRED:
+      b_mode = B_VE_PRED;
+      break;
+    case H_PRED:
+      b_mode = B_HE_PRED;
+      break;
+    case TM_PRED:
+      b_mode = B_TM_PRED;
+      break;
+    case D45_PRED:
+      b_mode = B_LD_PRED;
+      break;
+    case D135_PRED:
+      b_mode = B_RD_PRED;
+      break;
+    case D117_PRED:
+      b_mode = B_VR_PRED;
+      break;
+    case D153_PRED:
+      b_mode = B_HD_PRED;
+      break;
+    case D27_PRED:
+      b_mode = B_HU_PRED;
+      break;
+    case D63_PRED:
+      b_mode = B_VL_PRED;
+      break;
+    default :
+      // for debug purpose, to be removed after full testing
+      assert(0);
+      break;
+  }
+  return b_mode;
+}
+#endif
+
 #if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // transform mapping
-static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
+static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
   // map transform type
+  TX_TYPE tx_type;
   switch (bmode) {
     case B_TM_PRED :
     case B_RD_PRED :
-      b->bmi.as_mode.tx_type = ADST_ADST;
+      tx_type = ADST_ADST;
       break;
 
     case B_VE_PRED :
     case B_VR_PRED :
-      b->bmi.as_mode.tx_type = ADST_DCT;
+      tx_type = ADST_DCT;
       break;
 
     case B_HE_PRED :
     case B_HD_PRED :
     case B_HU_PRED :
-      b->bmi.as_mode.tx_type = DCT_ADST;
+      tx_type = DCT_ADST;
       break;
 
     default :
-      b->bmi.as_mode.tx_type = DCT_DCT;
+      tx_type = DCT_DCT;
       break;
   }
+  return tx_type;
 }
+#endif
 
-static TX_TYPE get_tx_type(MACROBLOCKD *xd, const BLOCKD *b) {
+#if CONFIG_HYBRIDTRANSFORM
+static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
+  TX_TYPE tx_type = DCT_DCT;
+  if (xd->mode_info_context->mbmi.mode == B_PRED &&
+      xd->q_index < ACTIVE_HT) {
+    tx_type = txfm_map(b->bmi.as_mode.first);
+  }
+  return tx_type;
+}
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM8X8
+static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
+  TX_TYPE tx_type = DCT_DCT;
+  if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
+      xd->q_index < ACTIVE_HT8) {
+    tx_type = txfm_map(pred_mode_conv(b->bmi.as_mode.first));
+  }
+  return tx_type;
+}
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM16X16
+static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
+  TX_TYPE tx_type = DCT_DCT;
+  if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
+      xd->q_index < ACTIVE_HT16) {
+    tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+  }
+  return tx_type;
+}
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || \
+    CONFIG_HYBRIDTRANSFORM16X16
+static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
   TX_TYPE tx_type = DCT_DCT;
   int ib = (b - xd->block);
-  if (ib >= 16) return tx_type;
+  if (ib >= 16)
+    return tx_type;
 #if CONFIG_HYBRIDTRANSFORM16X16
   if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
-    if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
-        xd->q_index < ACTIVE_HT16)
-      tx_type = b->bmi.as_mode.tx_type;
-    return tx_type;
+    tx_type = get_tx_type_16x16(xd, b);
   }
 #endif
 #if CONFIG_HYBRIDTRANSFORM8X8
   if (xd->mode_info_context->mbmi.txfm_size  == TX_8X8) {
-    BLOCKD *bb;
     ib = (ib & 8) + ((ib & 4) >> 1);
-    bb = xd->block + ib;
-    if (xd->mode_info_context->mbmi.mode == I8X8_PRED)
-      tx_type = bb->bmi.as_mode.tx_type;
-    return tx_type;
+    tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
   }
 #endif
 #if CONFIG_HYBRIDTRANSFORM
   if (xd->mode_info_context->mbmi.txfm_size  == TX_4X4) {
-    if (xd->mode_info_context->mbmi.mode == B_PRED &&
-        xd->q_index < ACTIVE_HT) {
-      tx_type = b->bmi.as_mode.tx_type;
-    }
-    return tx_type;
+    tx_type = get_tx_type_4x4(xd, b);
   }
 #endif
   return tx_type;
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 7b3ac36..d705fec 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -189,6 +189,7 @@
     // pointers to vertical and horizontal transforms
     float *ptv, *pth;
 
+    assert(tx_type != DCT_DCT);
     // load and convert residual array into floating-point
     for(j = 0; j < tx_dim; j++) {
       for(i = 0; i < tx_dim; i++) {
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 29b689f..7006c0c 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1234,6 +1234,14 @@
 #endif
       do {
         mi->bmi[j].as_mode.first = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob);
+        /*
+        {
+          int p;
+          for (p = 0; p < VP8_BINTRAMODES - 1; ++p)
+            printf(" %d", pbi->common.fc.bmode_prob[p]);
+          printf("\nbmode[%d][%d]: %d\n", pbi->common.current_video_frame, j, mi->bmi[j].as_mode.first);
+        }
+        */
         pbi->common.fc.bmode_counts[mi->bmi[j].as_mode.first]++;
 #if CONFIG_COMP_INTRA_PRED
         if (use_comp_pred) {
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 30878cf..049cac7 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -115,17 +115,13 @@
     }
   } else
     QIndex = pc->base_qindex;
+  xd->q_index = QIndex;
 
   /* Set up the block level dequant pointers */
   for (i = 0; i < 16; i++) {
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
-  xd->q_index = QIndex;
-#endif
-
-
 #if CONFIG_LOSSLESS
   if (!QIndex) {
     pbi->common.rtcd.idct.idct1        = vp8_short_inv_walsh4x4_1_x8_c;
@@ -210,21 +206,16 @@
   int eobtotal = 0;
   MB_PREDICTION_MODE mode;
   int i;
-  int tx_type;
+  int tx_size;
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || \
+    CONFIG_HYBRIDTRANSFORM16X16
+  TX_TYPE tx_type;
+#endif
 #if CONFIG_SUPERBLOCKS
   VP8_COMMON *pc = &pbi->common;
   int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
-  int QIndex;
-  int active_ht;
-#endif
-
-#if CONFIG_HYBRIDTRANSFORM16X16
-  int active_ht16;
-#endif
-
   // re-initialize macroblock dequantizer before detokenization
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
@@ -235,43 +226,9 @@
   }
 #endif
 
-  tx_type = xd->mode_info_context->mbmi.txfm_size;
+  tx_size = xd->mode_info_context->mbmi.txfm_size;
   mode = xd->mode_info_context->mbmi.mode;
 
-#if CONFIG_HYBRIDTRANSFORM
-  // parse transform types for intra 4x4 mode
-  QIndex = xd->q_index;
-  active_ht = (QIndex < ACTIVE_HT);
-  if (mode == B_PRED) {
-    for (i = 0; i < 16; i++) {
-      BLOCKD *b = &xd->block[i];
-      int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
-      if(active_ht)
-        txfm_map(b, b_mode);
-    } // loop over 4x4 blocks
-  }
-#endif
-
-#if CONFIG_HYBRIDTRANSFORM8X8
-  if (mode == I8X8_PRED) {
-    for (i = 0; i < 4; i++) {
-      int ib = vp8_i8x8_block[i];
-      BLOCKD *b = &xd->block[ib];
-      int i8x8mode = b->bmi.as_mode.first;
-      txfm_map(b, pred_mode_conv(i8x8mode));
-    }
-  }
-#endif
-
-#if CONFIG_HYBRIDTRANSFORM16X16
-  active_ht16 = (QIndex < ACTIVE_HT16);
-  if (mode < I8X8_PRED) {
-    BLOCKD *b = &xd->block[0];
-    if(active_ht16)
-      txfm_map(b, pred_mode_conv(mode));
-  }
-#endif
-
   if (xd->mode_info_context->mbmi.mb_skip_coeff) {
     vp8_reset_mb_tokens_context(xd);
 #if CONFIG_SUPERBLOCKS
@@ -288,12 +245,13 @@
       xd->block[i].eob = 0;
       xd->eobs[i] = 0;
     }
-    if (tx_type == TX_16X16)
+    if (tx_size == TX_16X16) {
       eobtotal = vp8_decode_mb_tokens_16x16(pbi, xd);
-    else if (tx_type == TX_8X8)
+    } else if (tx_size == TX_8X8) {
       eobtotal = vp8_decode_mb_tokens_8x8(pbi, xd);
-    else
+    } else {
       eobtotal = vp8_decode_mb_tokens(pbi, xd);
+    }
   }
 
   //mode = xd->mode_info_context->mbmi.mode;
@@ -380,13 +338,17 @@
 
       if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
 #if CONFIG_HYBRIDTRANSFORM8X8
-        vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type,
-                                      q, dq, pre, dst, 16, stride);
-        q += 64;
+        tx_type = get_tx_type(xd, &xd->block[idx]);
+        if (tx_type != DCT_DCT) {
+          vp8_ht_dequant_idct_add_8x8_c(tx_type,
+                                        q, dq, pre, dst, 16, stride);
+        } else {
+          vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
+        }
 #else
         vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
-        q += 64;
 #endif
+        q += 64;
       } else {
         for (j = 0; j < 4; j++) {
           b = &xd->block[ib + iblock[j]];
@@ -396,7 +358,7 @@
       }
 
       b = &xd->block[16 + i];
-	  vp8_intra_uv4x4_predict(b, i8x8mode, b->predictor);
+      vp8_intra_uv4x4_predict(b, i8x8mode, b->predictor);
       DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, b->dequant,
                                               b->predictor,
                                               *(b->base_dst) + b->dst, 8,
@@ -425,16 +387,17 @@
 #endif
 
 #if CONFIG_HYBRIDTRANSFORM
-      if(active_ht)
-        vp8_ht_dequant_idct_add_c( (TX_TYPE)b->bmi.as_mode.tx_type, b->qcoeff,
-                                   b->dequant, b->predictor,
-                                   *(b->base_dst) + b->dst, 16, b->dst_stride);
-      else
+      tx_type = get_tx_type(xd, b);
+      if (tx_type != DCT_DCT) {
+        vp8_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+                                  b->dequant, b->predictor,
+                                  *(b->base_dst) + b->dst, 16, b->dst_stride);
+      } else {
         vp8_dequant_idct_add_c(b->qcoeff, b->dequant, b->predictor,
                                *(b->base_dst) + b->dst, 16, b->dst_stride);
+      }
 #else
-      if (xd->eobs[i] > 1)
-      {
+      if (xd->eobs[i] > 1) {
         DEQUANT_INVOKE(&pbi->dequant, idct_add)
             (b->qcoeff, b->dequant,  b->predictor,
              *(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -454,15 +417,12 @@
   } else {
     BLOCKD *b = &xd->block[24];
 
-    if (tx_type == TX_16X16) {
+    if (tx_size == TX_16X16) {
 #if CONFIG_HYBRIDTRANSFORM16X16
-      if (mode < I8X8_PRED && active_ht16) {
-        BLOCKD *bd = &xd->block[0];
-        TX_TYPE txfm;
-        txfm_map(bd, pred_mode_conv(mode));
-        txfm = bd->bmi.as_mode.tx_type;
-
-        vp8_ht_dequant_idct_add_16x16_c(txfm, xd->qcoeff,
+      BLOCKD *bd = &xd->block[0];
+      tx_type = get_tx_type(xd, bd);
+      if (tx_type != DCT_DCT) {
+        vp8_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
                                         xd->block[0].dequant, xd->predictor,
                                         xd->dst.y_buffer, 16, xd->dst.y_stride);
       } else {
@@ -475,8 +435,7 @@
                                    xd->predictor, xd->dst.y_buffer,
                                    16, xd->dst.y_stride);
 #endif
-    }
-    else if (tx_type == TX_8X8) {
+    } else if (tx_size == TX_8X8) {
 #if CONFIG_SUPERBLOCKS
       void *orig = xd->mode_info_context;
       int n, num = xd->mode_info_context->mbmi.encoded_as_sb ? 4 : 1;
@@ -561,10 +520,9 @@
 #if CONFIG_SUPERBLOCKS
   if (!xd->mode_info_context->mbmi.encoded_as_sb) {
 #endif
-    if ((tx_type == TX_8X8 &&
+    if ((tx_size == TX_8X8 &&
          xd->mode_info_context->mbmi.mode != I8X8_PRED)
-        || tx_type == TX_16X16
-       )
+        || tx_size == TX_16X16)
       DEQUANT_INVOKE(&pbi->dequant, idct_add_uv_block_8x8) //
           (xd->qcoeff + 16 * 16, xd->block[16].dequant,
            xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 5b5ec7e..4ca7d86 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -578,8 +578,7 @@
     if (i == 16)
       type = PLANE_TYPE_UV;
 #if CONFIG_HYBRIDTRANSFORM8X8
-    if (type == PLANE_TYPE_Y_WITH_DC &&
-        xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+    if (type == PLANE_TYPE_Y_WITH_DC) {
       tx_type = get_tx_type(xd, xd->block + i);
     }
 #endif
@@ -673,10 +672,7 @@
       type = PLANE_TYPE_UV;
 
 #if CONFIG_HYBRIDTRANSFORM
-    if (type == PLANE_TYPE_Y_WITH_DC)
-      tx_type = get_tx_type(xd, &xd->block[i]);
-#endif
-#if CONFIG_HYBRIDTRANSFORM
+    tx_type = get_tx_type(xd, &xd->block[i]);
     switch(tx_type) {
       case ADST_DCT :
         scan = vp8_row_scan;
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 2657fd2..bd5def6 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -996,6 +996,14 @@
 #endif
               write_bmode(w, m->bmi[j].as_mode.first,
                           pc->fc.bmode_prob);
+              /*
+              if (!cpi->dummy_packing) {
+                int p;
+                for (p = 0; p < VP8_BINTRAMODES - 1; ++p)
+                  printf(" %d", pc->fc.bmode_prob[p]);
+                printf("\nbmode[%d][%d]: %d\n", pc->current_video_frame, j, m->bmi[j].as_mode.first);
+              }
+              */
 #if CONFIG_COMP_INTRA_PRED
               if (uses_second) {
                 write_bmode(w, mode2, pc->fc.bmode_prob);
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 8617004..80f9b75 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -170,7 +170,6 @@
 #endif
 
   int optimize;
-  int q_index;
 
   // Structure to hold context for each of the 4 MBs within a SB:
   // when encoded as 4 independent MBs:
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index d81a547..cd13fec 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -419,6 +419,7 @@
     // pointers to vertical and horizontal transforms
     float *ptv, *pth;
 
+    assert(tx_type != DCT_DCT);
     // load and convert residual array into floating-point
     for(j = 0; j < tx_dim; j++) {
       for(i = 0; i < tx_dim; i++) {
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 0a25f8e..c92ec02 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1061,7 +1061,7 @@
       x->partition_info     += 2;
       xd->mode_info_context += 2;
       xd->prev_mode_info_context += 2;
-      
+
       (*tp)->Token = EOSB_TOKEN;
       (*tp)++;
       if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp;
@@ -1931,7 +1931,7 @@
     update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip);
   }
 }
-#endif
+#endif /* CONFIG_SUPERBLOCKS */
 
 void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
                                     MACROBLOCK *x,
@@ -1942,7 +1942,6 @@
     adjust_act_zbin(cpi, x);
     vp8_update_zbin_extra(cpi, x);
   }
-
   if (mbmi->mode == I8X8_PRED) {
     vp8_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x);
     vp8_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x);
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 703a101..9ff39c6 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -57,6 +57,9 @@
                               MACROBLOCK *x, int ib) {
   BLOCKD *b = &x->e_mbd.block[ib];
   BLOCK *be = &x->block[ib];
+#if CONFIG_HYBRIDTRANSFORM
+  TX_TYPE tx_type;
+#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
@@ -72,11 +75,11 @@
   ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
 
 #if CONFIG_HYBRIDTRANSFORM
-  if (x->q_index < ACTIVE_HT) {
-    txfm_map(b, b->bmi.as_mode.first);
-    vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);
-    vp8_ht_quantize_b_4x4(be, b);
-    vp8_ihtllm_c(b->dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type, 4);
+  tx_type = get_tx_type(&x->e_mbd, b);
+  if (tx_type != DCT_DCT) {
+    vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4);
+    vp8_ht_quantize_b_4x4(be, b, tx_type);
+    vp8_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4);
   } else
 #endif
   {
@@ -91,12 +94,6 @@
 void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb) {
   int i;
 
-#if 0
-  MACROBLOCKD *xd = &mb->e_mbd;
-  // Intra modes requiring top-right MB reconstructed data have been disabled
-  vp8_intra_prediction_down_copy(xd);
-#endif
-
   for (i = 0; i < 16; i++)
     vp8_encode_intra4x4block(rtcd, mb, i);
   return;
@@ -107,7 +104,7 @@
   BLOCK *b = &x->block[0];
   TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
 #if CONFIG_HYBRIDTRANSFORM16X16
-  TX_TYPE txfm_type = xd->mode_info_context->bmi[0].as_mode.tx_type;
+  TX_TYPE tx_type;
 #endif
 
 #if CONFIG_COMP_INTRA_PRED
@@ -124,16 +121,14 @@
 
   if (tx_size == TX_16X16) {
 #if CONFIG_HYBRIDTRANSFORM16X16
-    if ((xd->mode_info_context->mbmi.mode < I8X8_PRED) &&
-        (x->q_index < ACTIVE_HT16)) {
-      BLOCKD  *bd = &xd->block[0];
-      txfm_map(bd, pred_mode_conv(xd->mode_info_context->mbmi.mode));
-      txfm_type = bd->bmi.as_mode.tx_type;
-      vp8_fht_c(b->src_diff, b->coeff, 32, txfm_type, 16);
+    BLOCKD  *bd = &xd->block[0];
+    tx_type = get_tx_type(xd, bd);
+    if (tx_type != DCT_DCT) {
+      vp8_fht_c(b->src_diff, b->coeff, 32, tx_type, 16);
       vp8_quantize_mby_16x16(x);
       if (x->optimize)
         vp8_optimize_mby_16x16(x, rtcd);
-      vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, txfm_type, 16);
+      vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16);
     } else
 #endif
     {
@@ -201,6 +196,9 @@
   BLOCK *be = &x->block[ib];
   const int iblock[4] = {0, 1, 4, 5};
   int i;
+#if CONFIG_HYBRIDTRANSFORM8X8
+  TX_TYPE tx_type;
+#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
@@ -220,16 +218,20 @@
     vp8_subtract_4b_c(be, b, 16);
 
 #if CONFIG_HYBRIDTRANSFORM8X8
-    txfm_map(b, pred_mode_conv(b->bmi.as_mode.first));
-    vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
-              b->bmi.as_mode.tx_type, 8);
-    x->quantize_b_8x8(x->block + idx, xd->block + idx);
-    vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
-                 b->bmi.as_mode.tx_type, 8);
-#else
-    x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
-    x->quantize_b_8x8(x->block + idx, xd->block + idx);
-    vp8_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+    tx_type = get_tx_type(xd, &xd->block[idx]);
+    if (tx_type != DCT_DCT) {
+      vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
+                tx_type, 8);
+      x->quantize_b_8x8(x->block + idx, xd->block + idx);
+      vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
+                   tx_type, 8);
+    } else {
+#endif
+      x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+      x->quantize_b_8x8(x->block + idx, xd->block + idx);
+      vp8_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+#if CONFIG_HYBRIDTRANSFORM8X8
+    }
 #endif
   } else {
     for (i = 0; i < 4; i++) {
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index edc6c2e..dc54d05 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -267,7 +267,7 @@
 
 void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
                 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
-                const VP8_ENCODER_RTCD *rtcd, int tx_type) {
+                const VP8_ENCODER_RTCD *rtcd, int tx_size) {
   BLOCK *b;
   BLOCKD *d;
   vp8_token_state tokens[65][2];
@@ -298,7 +298,7 @@
 
   b = &mb->block[i];
   d = &mb->e_mbd.block[i];
-  switch (tx_type) {
+  switch (tx_size) {
     default:
     case TX_4X4:
       scan = vp8_default_zig_zag1d;
@@ -308,11 +308,9 @@
       // TODO: this isn't called (for intra4x4 modes), but will be left in
       // since it could be used later
       {
-        int active_ht = (mb->q_index < ACTIVE_HT) &&
-                        (mb->e_mbd.mode_info_context->mbmi.mode == B_PRED);
-
-        if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) {
-          switch (d->bmi.as_mode.tx_type) {
+        TX_TYPE tx_type = get_tx_type(&mb->e_mbd, d);
+        if (tx_type != DCT_DCT) {
+          switch (tx_type) {
             case ADST_DCT:
               scan = vp8_row_scan;
               break;
@@ -325,9 +323,9 @@
               scan = vp8_default_zig_zag1d;
               break;
           }
-
-        } else
+        } else {
           scan = vp8_default_zig_zag1d;
+        }
       }
 #endif
       break;
@@ -380,9 +378,9 @@
         band = bands[i + 1];
         pt = vp8_prev_token_class[t0];
         rate0 +=
-          mb->token_costs[tx_type][type][band][pt][tokens[next][0].token];
+          mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
         rate1 +=
-          mb->token_costs[tx_type][type][band][pt][tokens[next][1].token];
+          mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
       }
       UPDATE_RD_COST();
       /* And pick the best. */
@@ -427,12 +425,12 @@
         band = bands[i + 1];
         if (t0 != DCT_EOB_TOKEN) {
           pt = vp8_prev_token_class[t0];
-          rate0 += mb->token_costs[tx_type][type][band][pt][
+          rate0 += mb->token_costs[tx_size][type][band][pt][
               tokens[next][0].token];
         }
         if (t1 != DCT_EOB_TOKEN) {
           pt = vp8_prev_token_class[t1];
-          rate1 += mb->token_costs[tx_type][type][band][pt][
+          rate1 += mb->token_costs[tx_size][type][band][pt][
               tokens[next][1].token];
         }
       }
@@ -464,11 +462,11 @@
       t1 = tokens[next][1].token;
       /* Update the cost of each path if we're past the EOB token. */
       if (t0 != DCT_EOB_TOKEN) {
-        tokens[next][0].rate += mb->token_costs[tx_type][type][band][0][t0];
+        tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
         tokens[next][0].token = ZERO_TOKEN;
       }
       if (t1 != DCT_EOB_TOKEN) {
-        tokens[next][1].rate += mb->token_costs[tx_type][type][band][0][t1];
+        tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
         tokens[next][1].token = ZERO_TOKEN;
       }
       /* Don't update next, because we didn't add a new node. */
@@ -484,8 +482,8 @@
   error1 = tokens[next][1].error;
   t0 = tokens[next][0].token;
   t1 = tokens[next][1].token;
-  rate0 += mb->token_costs[tx_type][type][band][pt][t0];
-  rate1 += mb->token_costs[tx_type][type][band][pt][t1];
+  rate0 += mb->token_costs[tx_size][type][band][pt][t0];
+  rate1 += mb->token_costs[tx_size][type][band][pt][t1];
   UPDATE_RD_COST();
   best = rd_cost1 < rd_cost0;
   final_eob = i0 - 1;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index cf9989d..14e9e78 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1363,7 +1363,8 @@
 
   vpx_free(cpi->tplist);
 
-  CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
+  CHECK_MEM_ERROR(cpi->tplist,
+                  vpx_malloc(sizeof(TOKENLIST) * (cpi->common.mb_rows)));
 }
 
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 8ae3029..b6a1f27 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -22,7 +22,7 @@
 #endif
 
 #if CONFIG_HYBRIDTRANSFORM
-void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
+void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
   int i, rc, eob;
   int zbin;
   int x, y, z, sz;
@@ -39,7 +39,7 @@
 
   int const *pt_scan ;
 
-  switch(d->bmi.as_mode.tx_type) {
+  switch (tx_type) {
     case ADST_DCT :
       pt_scan = vp8_row_scan;
       break;
@@ -653,12 +653,12 @@
   }
 
   /* save this macroblock QIndex for vp8_update_zbin_extra() */
-  x->q_index = QIndex;
+  x->e_mbd.q_index = QIndex;
 }
 
 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) {
   int i;
-  int QIndex = x->q_index;
+  int QIndex = x->e_mbd.q_index;
   int zbin_extra;
 
   // Y
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index ad3a3fc..1375ed0 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -31,7 +31,9 @@
 #endif
 
 #if CONFIG_HYBRIDTRANSFORM
-extern prototype_quantize_block(vp8_ht_quantize_b_4x4);
+#define prototype_quantize_block_type(sym) \
+  void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
+extern prototype_quantize_block_type(vp8_ht_quantize_b_4x4);
 #endif
 
 #ifndef vp8_quantize_quantb_4x4
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 03a3fa4..7b5c8e1 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -618,40 +618,39 @@
       band = vp8_coef_bands;
       default_eob = 16;
 #if CONFIG_HYBRIDTRANSFORM
-      if (type == PLANE_TYPE_Y_WITH_DC &&
-          mb->q_index < ACTIVE_HT &&
-          mbmi->mode == B_PRED) {
-        tx_type = b->bmi.as_mode.tx_type;
-        switch (tx_type) {
-          case ADST_DCT:
-            scan = vp8_row_scan;
-            break;
+      if (type == PLANE_TYPE_Y_WITH_DC) {
+        tx_type = get_tx_type_4x4(xd, b);
+        if (tx_type != DCT_DCT) {
+          switch (tx_type) {
+            case ADST_DCT:
+              scan = vp8_row_scan;
+              break;
 
-          case DCT_ADST:
-            scan = vp8_col_scan;
-            break;
+            case DCT_ADST:
+              scan = vp8_col_scan;
+              break;
 
-          default:
-            scan = vp8_default_zig_zag1d;
-            break;
+            default:
+              scan = vp8_default_zig_zag1d;
+              break;
+          }
         }
-
       }
 #endif
+
       break;
     case TX_8X8:
       scan = vp8_default_zig_zag1d_8x8;
       band = vp8_coef_bands_8x8;
       default_eob = 64;
 #if CONFIG_HYBRIDTRANSFORM8X8
-      {
+      if (type == PLANE_TYPE_Y_WITH_DC) {
         BLOCKD *bb;
         int ib = (b - xd->block);
         if (ib < 16) {
           ib = (ib & 8) + ((ib & 4) >> 1);
           bb = xd->block + ib;
-          if (mbmi->mode == I8X8_PRED)
-            tx_type = bb->bmi.as_mode.tx_type;
+          tx_type = get_tx_type_8x8(xd, bb);
         }
       }
 #endif
@@ -661,10 +660,9 @@
       band = vp8_coef_bands_16x16;
       default_eob = 256;
 #if CONFIG_HYBRIDTRANSFORM16X16
-      if (type == PLANE_TYPE_Y_WITH_DC &&
-          mbmi->mode < I8X8_PRED &&
-          mb->q_index < ACTIVE_HT16)
-          tx_type = b->bmi.as_mode.tx_type;
+      if (type == PLANE_TYPE_Y_WITH_DC) {
+        tx_type = get_tx_type_16x16(xd, b);
+      }
 #endif
       break;
     default:
@@ -675,8 +673,6 @@
   else
     seg_eob = default_eob;
 
-  //mbmi->mode = mode;
-
   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
 #if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
@@ -871,6 +867,12 @@
 static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
                                   const VP8_ENCODER_RTCD *rtcd, int *skippable) {
   int d;
+  MACROBLOCKD *xd = &mb->e_mbd;
+  BLOCKD *b  = &mb->e_mbd.block[0];
+  BLOCK  *be = &mb->block[0];
+#if CONFIG_HYBRIDTRANSFORM16X16
+  TX_TYPE tx_type;
+#endif
 
   ENCODEMB_INVOKE(&rtcd->encodemb, submby)(
     mb->src_diff,
@@ -879,12 +881,9 @@
     mb->block[0].src_stride);
 
 #if CONFIG_HYBRIDTRANSFORM16X16
-  if ((mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) &&
-      (mb->q_index < ACTIVE_HT16)) {
-    BLOCKD *b  = &mb->e_mbd.block[0];
-    BLOCK  *be = &mb->block[0];
-    txfm_map(b, pred_mode_conv(mb->e_mbd.mode_info_context->mbmi.mode));
-    vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 16);
+  tx_type = get_tx_type_16x16(xd, b);
+  if (tx_type != DCT_DCT) {
+    vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 16);
   } else
     vp8_transform_mby_16x16(mb);
 #else
@@ -1145,12 +1144,7 @@
                                      int *bestrate, int *bestratey,
                                      int *bestdistortion) {
   B_PREDICTION_MODE mode;
-
-#if CONFIG_HYBRIDTRANSFORM
-  int QIndex = x->q_index;
-  int active_ht = (QIndex < ACTIVE_HT);
-  TX_TYPE best_tx_type;
-#endif
+  MACROBLOCKD *xd = &x->e_mbd;
 
 #if CONFIG_COMP_INTRA_PRED
   B_PREDICTION_MODE mode2;
@@ -1161,6 +1155,10 @@
 
   ENTROPY_CONTEXT ta = *a, tempa = *a;
   ENTROPY_CONTEXT tl = *l, templ = *l;
+#if CONFIG_HYBRIDTRANSFORM
+  TX_TYPE tx_type = DCT_DCT;
+  TX_TYPE best_tx_type = DCT_DCT;
+#endif
   /*
    * The predictor buffer is a 2d buffer with a stride of 16.  Create
    * a temp buffer that meets the stride requirements, but we are only
@@ -1197,48 +1195,49 @@
 #endif
       ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
 
+      b->bmi.as_mode.first = mode;
 #if CONFIG_HYBRIDTRANSFORM
-      if (active_ht) {
-        txfm_map(b, mode);
-        vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);
-        vp8_ht_quantize_b_4x4(be, b);
+      tx_type = get_tx_type_4x4(xd, b);
+      if (tx_type != DCT_DCT) {
+        vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4);
+        vp8_ht_quantize_b_4x4(be, b, tx_type);
       } else {
         x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4(be, b);
       }
 #else
-        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
-        x->quantize_b_4x4(be, b);
+      x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->quantize_b_4x4(be, b);
 #endif
 
-        tempa = ta;
-        templ = tl;
+      tempa = ta;
+      templ = tl;
 
-        ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
-        rate += ratey;
-        distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(
-            be->coeff, b->dqcoeff, 16) >> 2;
+      ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
+      rate += ratey;
+      distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(
+          be->coeff, b->dqcoeff, 16) >> 2;
 
-        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 
-        if (this_rd < best_rd) {
-          *bestrate = rate;
-          *bestratey = ratey;
-          *bestdistortion = distortion;
-          best_rd = this_rd;
-          *best_mode = mode;
+      if (this_rd < best_rd) {
+        *bestrate = rate;
+        *bestratey = ratey;
+        *bestdistortion = distortion;
+        best_rd = this_rd;
+        *best_mode = mode;
 #if CONFIG_HYBRIDTRANSFORM
-          best_tx_type = b->bmi.as_mode.tx_type ;
+        best_tx_type = tx_type;
 #endif
 
 #if CONFIG_COMP_INTRA_PRED
-          *best_second_mode = mode2;
+        *best_second_mode = mode2;
 #endif
-          *a = tempa;
-          *l = templ;
-          copy_predictor(best_predictor, b->predictor);
-          vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
-        }
+        *a = tempa;
+        *l = templ;
+        copy_predictor(best_predictor, b->predictor);
+        vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+      }
 #if CONFIG_COMP_INTRA_PRED
     }
 #endif
@@ -1249,16 +1248,15 @@
 #endif
 
 #if CONFIG_HYBRIDTRANSFORM
-  b->bmi.as_mode.tx_type = best_tx_type;
-
   // inverse transform
-  if (active_ht)
-    vp8_ihtllm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type, 4);
+  if (best_tx_type != DCT_DCT)
+    vp8_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4);
   else
-    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff,
-                                                                b->diff, 32);
+    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
+        best_dqcoeff, b->diff, 32);
 #else
-  IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
+  IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
+      best_dqcoeff, b->diff, 32);
 #endif
 
   vp8_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -1413,9 +1411,6 @@
   int64_t this_rd;
   MACROBLOCKD *xd = &x->e_mbd;
 
-#if CONFIG_HYBRIDTRANSFORM16X16
-  int best_txtype, rd_txtype;
-#endif
 #if CONFIG_TX_SELECT
   int i;
   for (i = 0; i < NB_TXFM_MODES; i++)
@@ -1449,9 +1444,6 @@
 
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 
-#if CONFIG_HYBRIDTRANSFORM16X16
-      rd_txtype = x->e_mbd.block[0].bmi.as_mode.tx_type;
-#endif
 
       if (this_rd < best_rd) {
         mode_selected = mode;
@@ -1463,9 +1455,6 @@
         *Rate = rate;
         *rate_y = ratey;
         *Distortion = distortion;
-#if CONFIG_HYBRIDTRANSFORM16X16
-        best_txtype = rd_txtype;
-#endif
         *skippable = skip;
       }
 
@@ -1486,9 +1475,6 @@
 
   mbmi->txfm_size = txfm_size;
   mbmi->mode = mode_selected;
-#if CONFIG_HYBRIDTRANSFORM16X16
-  x->e_mbd.block[0].bmi.as_mode.tx_type = best_txtype;
-#endif
 
 #if CONFIG_COMP_INTRA_PRED
   mbmi->second_mode = mode2_selected;
@@ -1539,6 +1525,7 @@
 
       // FIXME rate for compound mode and second intrapred mode
       rate = mode_costs[mode];
+      b->bmi.as_mode.first = mode;
 
 #if CONFIG_COMP_INTRA_PRED
       if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
@@ -1555,10 +1542,11 @@
 
       if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
 #if CONFIG_HYBRIDTRANSFORM8X8
-        txfm_map(b, pred_mode_conv(mode));
-        vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
-                  b->bmi.as_mode.tx_type, 8);
-
+        TX_TYPE tx_type = get_tx_type_8x8(xd, b);
+        if (tx_type != DCT_DCT)
+          vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, tx_type, 8);
+        else
+          x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
 #else
         x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
 #endif
@@ -3364,10 +3352,6 @@
   unsigned int ref_costs[MAX_REF_FRAMES];
   int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1];
 
-#if CONFIG_HYBRIDTRANSFORM16X16
-  int best_txtype, rd_txtype;
-#endif
-
   vpx_memset(mode8x8, 0, sizeof(mode8x8));
   vpx_memset(&frame_mv, 0, sizeof(frame_mv));
   vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3591,9 +3575,6 @@
           // FIXME compound intra prediction
           vp8_build_intra_predictors_mby(&x->e_mbd);
           macro_block_yrd(cpi, x, &rate_y, &distortion, &skippable, txfm_cache);
-#if CONFIG_HYBRIDTRANSFORM16X16
-          rd_txtype = x->e_mbd.block[0].bmi.as_mode.tx_type;
-#endif
           rate2 += rate_y;
           distortion2 += distortion;
           rate2 += x->mbmode_cost[x->e_mbd.frame_type][mbmi->mode];
@@ -4069,10 +4050,6 @@
           // Note index of best mode so far
           best_mode_index = mode_index;
 
-#if CONFIG_HYBRIDTRANSFORM16X16
-          best_txtype = rd_txtype;
-#endif
-
           if (this_mode <= B_PRED) {
             if (mbmi->txfm_size != TX_4X4
                 && this_mode != B_PRED
@@ -4245,11 +4222,6 @@
     }
   }
 
-#if CONFIG_HYBRIDTRANSFORM16X16
-  if (best_mbmode.mode < I8X8_PRED)
-    xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
-#endif
-
   if (best_mbmode.mode == I8X8_PRED)
     set_i8x8_block_modes(x, mode8x8);
 
@@ -4354,10 +4326,6 @@
   TX_SIZE txfm_size_16x16;
   int i;
 
-#if CONFIG_HYBRIDTRANSFORM16X16
-  int best_txtype;
-#endif
-
   mbmi->ref_frame = INTRA_FRAME;
   rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv,
                           &uv_intra_skippable);
@@ -4379,10 +4347,6 @@
                                           &rate16x16_tokenonly, &dist16x16,
                                           &y_intra16x16_skippable, txfm_cache);
   mode16x16 = mbmi->mode;
-#if CONFIG_HYBRIDTRANSFORM16X16
-  best_txtype = xd->block[0].bmi.as_mode.tx_type;
-  xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
-#endif
   txfm_size_16x16 = mbmi->txfm_size;
 
   // FIXME(rbultje) support transform-size selection
@@ -4452,10 +4416,6 @@
       mbmi->mode = mode16x16;
       rate = rate16x16 + rateuv8x8;
       dist = dist16x16 + (distuv8x8 >> 2);
-#if CONFIG_HYBRIDTRANSFORM16X16
-      // save this into supermacroblock coding decision buffer
-      xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
-#endif
 #if CONFIG_TX_SELECT
       for (i = 0; i < NB_TXFM_MODES; i++) {
         x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i];
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index c72c1e7..d46637a 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -171,6 +171,7 @@
 
     t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
                                    (band > 1 && type == PLANE_TYPE_Y_NO_DC));
+    assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
     if (!dry_run) {
 #if CONFIG_HYBRIDTRANSFORM16X16
       if (tx_type != DCT_DCT)
@@ -310,8 +311,7 @@
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
   const short *qcoeff_ptr = b->qcoeff;
 #if CONFIG_HYBRIDTRANSFORM8X8
-  TX_TYPE tx_type = xd->mode_info_context->mbmi.mode == I8X8_PRED ?
-      get_tx_type(xd, b) : DCT_DCT;
+  TX_TYPE tx_type = get_tx_type(xd, b);
 #endif
   const int eob = b->eob;
   int seg_eob = 64;
@@ -427,60 +427,49 @@
   }
 }
 
-#if CONFIG_HYBRIDTRANSFORM
-static void tokenize1st_order_ht_4x4(MACROBLOCKD *xd,
-                                     TOKENEXTRA **tp,
-                                     PLANE_TYPE type,
-                                     VP8_COMP *cpi,
-                                     int dry_run) {
+static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
+                                    TOKENEXTRA **tp,
+                                    PLANE_TYPE type,
+                                    VP8_COMP *cpi,
+                                    int dry_run) {
   unsigned int block;
   const BLOCKD *b = xd->block;
   int pt;             /* near block/prev token context index */
   TOKENEXTRA *t = *tp;/* store tokens starting here */
-  ENTROPY_CONTEXT * a;
-  ENTROPY_CONTEXT * l;
-  int const *pt_scan ;
+  ENTROPY_CONTEXT *a, *l;
   int seg_eob = 16;
   int segment_id = xd->mode_info_context->mbmi.segment_id;
+  int const *pt_scan = vp8_default_zig_zag1d;
 
-  if ( segfeature_active( xd, segment_id, SEG_LVL_EOB ) ) {
-    seg_eob = get_segdata( xd, segment_id, SEG_LVL_EOB );
+  if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
+    seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
   }
 
   /* Luma */
   for (block = 0; block < 16; block++, b++) {
     const int eob = b->eob;
-    TX_TYPE tx_type = DCT_DCT;
-    const int tmp1 = vp8_block2above[block];
-    const int tmp2 = vp8_block2left[block];
     const int16_t *qcoeff_ptr = b->qcoeff;
     int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
 
-    a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
-    l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
+#if CONFIG_HYBRIDTRANSFORM
+    TX_TYPE tx_type = get_tx_type(xd, &xd->block[block]);
+    switch (tx_type) {
+      case ADST_DCT:
+        pt_scan = vp8_row_scan;
+        break;
+      case DCT_ADST:
+        pt_scan = vp8_col_scan;
+        break;
+      default :
+        pt_scan = vp8_default_zig_zag1d;
+        break;
+    }
+#endif
+    a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block];
+    l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block];
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
-    if( xd->mode_info_context->mbmi.mode == B_PRED ) {
-      tx_type = get_tx_type(xd, b);
-    }
-
-    // assign scanning order for luma components coded in intra4x4 mode
-    if ((xd->mode_info_context->mbmi.mode == B_PRED) &&
-        (type == PLANE_TYPE_Y_WITH_DC)) {
-      switch (tx_type) {
-        case ADST_DCT:
-          pt_scan = vp8_row_scan;
-          break;
-        case DCT_ADST:
-          pt_scan = vp8_col_scan;
-          break;
-        default :
-          pt_scan = vp8_default_zig_zag1d;
-          break;
-      }
-    } else {
-      pt_scan = vp8_default_zig_zag1d;
-    }
+    assert(b->eob <= 16);
 
     do {
       const int band = vp8_coef_bands[c];
@@ -496,84 +485,24 @@
         token = DCT_EOB_TOKEN;
 
       t->Token = token;
+#if CONFIG_HYBRIDTRANSFORM
       if (tx_type != DCT_DCT)
         t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt];
       else
+#endif
         t->context_tree = cpi->common.fc.coef_probs[type][band][pt];
 
       t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
                                      (band > 1 && type == PLANE_TYPE_Y_NO_DC));
       assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
-
       if (!dry_run) {
+#if CONFIG_HYBRIDTRANSFORM
         if (tx_type != DCT_DCT)
           ++cpi->hybrid_coef_counts[type][band][pt][token];
         else
-          ++cpi->coef_counts       [type][band][pt][token];
-      }
-      pt = vp8_prev_token_class[token];
-      ++t;
-    } while (c < eob && ++c < seg_eob);
-
-    *tp = t;
-    pt = (c != !type); /* 0 <-> all coeff data is zero */
-    *a = *l = pt;
-  }
-
-  tokenize1st_order_chroma_4x4(xd, tp, cpi, dry_run);
-}
 #endif
-
-static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
-                                    TOKENEXTRA **tp,
-                                    PLANE_TYPE type,
-                                    VP8_COMP *cpi,
-                                    int dry_run) {
-  unsigned int block;
-  const BLOCKD *b = xd->block;
-  int pt;             /* near block/prev token context index */
-  TOKENEXTRA *t = *tp;/* store tokens starting here */
-  ENTROPY_CONTEXT *a, *l;
-  int seg_eob = 16;
-  int segment_id = xd->mode_info_context->mbmi.segment_id;
-
-  if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
-    seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
-  }
-
-  /* Luma */
-  for (block = 0; block < 16; block++, b++) {
-    const int eob = b->eob;
-    const int16_t *qcoeff_ptr = b->qcoeff;
-    int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
-
-    a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block];
-    l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block];
-    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-
-    assert(b->eob <= 16);
-
-    do {
-      const int band = vp8_coef_bands[c];
-      int token;
-
-      if (c < eob) {
-        const int rc = vp8_default_zig_zag1d[c];
-        const int v = qcoeff_ptr[rc];
-
-        t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
-        token    = vp8_dct_value_tokens_ptr[v].Token;
-      } else
-        token = DCT_EOB_TOKEN;
-
-      t->Token = token;
-      t->context_tree = cpi->common.fc.coef_probs[type][band][pt];
-
-      t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
-                                     (band > 1 && type == PLANE_TYPE_Y_NO_DC));
-      assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
-      if (!dry_run)
-        ++cpi->coef_counts[type][band][pt][token];
+          ++cpi->coef_counts[type][band][pt][token];
+      }
       pt = vp8_prev_token_class[token];
       ++t;
     } while (c < eob && ++c < seg_eob);
@@ -674,12 +603,6 @@
   int skip_inc;
   int segment_id = xd->mode_info_context->mbmi.segment_id;
 
-#if CONFIG_HYBRIDTRANSFORM
-    int QIndex = cpi->mb.q_index;
-    int active_ht = (QIndex < ACTIVE_HT) &&
-                    (xd->mode_info_context->mbmi.mode == B_PRED);
-#endif
-
   if (!segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
       (get_segdata(xd, segment_id, SEG_LVL_EOB) != 0)) {
     skip_inc = 1;
@@ -784,12 +707,7 @@
       }
     }
   } else {
-#if CONFIG_HYBRIDTRANSFORM
-    if (active_ht)
-      tokenize1st_order_ht_4x4(xd, t, plane_type, cpi, dry_run);
-    else
-#endif
-      tokenize1st_order_b_4x4(xd, t, plane_type, cpi, dry_run);
+    tokenize1st_order_b_4x4(xd, t, plane_type, cpi, dry_run);
   }
   if (dry_run)
     *t = t_backup;
@@ -1078,8 +996,7 @@
   int pt; /* near block/prev token context index */
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
 #if CONFIG_HYBRIDTRANSFORM8X8
-  TX_TYPE tx_type = xd->mode_info_context->mbmi.mode == I8X8_PRED ?
-      get_tx_type(xd, b) : DCT_DCT;
+  TX_TYPE tx_type = get_tx_type(xd, b);
 #endif
   const int band = vp8_coef_bands_8x8[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);