diff --git a/configure b/configure
index 202778d66e52ab49adc9f07b8f211f4c9170c971..a24a1e10fdfb38edd724686f51d232992ac2a2e4 100755
--- a/configure
+++ b/configure
@@ -228,6 +228,7 @@ EXPERIMENT_LIST="
     adaptive_entropy
     pred_filter
     lossless
+    hybridtransform
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 3c60b10ae47e5f21b2aa32bb44967a8808692851..758977d6d778a9bfe4b23b3e5a7108cdc2ffbe33 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -124,12 +124,25 @@ typedef enum {
 
 } TX_SIZE;
 
+#if CONFIG_HYBRIDTRANSFORM
+typedef enum {
+  DCT_DCT   = 0,                      // DCT  in both horizontal and vertical
+  ADST_DCT  = 1,                      // ADST in horizontal, DCT in vertical
+  DCT_ADST  = 2,                      // DCT  in horizontal, ADST in vertical
+  ADST_ADST = 3                       // ADST in both directions
+} TX_TYPE;
+#endif
+
 #define VP8_YMODES  (B_PRED + 1)
 #define VP8_UV_MODES (TM_PRED + 1)
 #define VP8_I8X8_MODES (TM_PRED + 1)
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
+#if CONFIG_HYBRIDTRANSFORM
+#define ACTIVE_HT 110                // quantization stepsize threshold
+#endif
+
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -163,6 +176,11 @@ typedef enum {
 union b_mode_info {
   struct {
     B_PREDICTION_MODE first;
+#if CONFIG_HYBRIDTRANSFORM
+    B_PREDICTION_MODE test;
+    TX_TYPE           tx_type;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
     B_PREDICTION_MODE second;
 #endif
@@ -183,6 +201,10 @@ typedef enum {
 
 typedef struct {
   MB_PREDICTION_MODE mode, uv_mode;
+#if CONFIG_HYBRIDTRANSFORM
+  MB_PREDICTION_MODE mode_rdopt;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
   MB_PREDICTION_MODE second_mode, second_uv_mode;
 #endif
@@ -345,6 +367,10 @@ typedef struct MacroBlockD {
 
   int mb_index;   // Index of the MB in the SB (0..3)
 
+#if CONFIG_HYBRIDTRANSFORM
+  int q_index;
+#endif
+
 } MACROBLOCKD;
 
 
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index 4c7caef8202a88f65beddf80a17e1997ed23caa2..397e4aea3f3deb814de141ca5e1a0e4715579d4b 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -65,6 +65,24 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = {
   9, 12, 13, 10,
   7, 11, 14, 15,
 };
+
+
+#if CONFIG_HYBRIDTRANSFORM
+DECLARE_ALIGNED(16, const int, vp8_col_scan[16]) = {
+  0, 4,  8, 12,
+  1, 5,  9, 13,
+  2, 6, 10, 14,
+  3, 7, 11, 15
+};
+DECLARE_ALIGNED(16, const int, vp8_row_scan[16]) = {
+  0,   1,  2,  3,
+  4,   5,  6,  7,
+  8,   9, 10, 11,
+  12, 13, 14, 15
+};
+#endif
+
+
 DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
                                                         5, 3, 6, 3, 5, 4, 6, 6,
                                                         6, 5, 5, 6, 6, 6, 6, 6,
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index bc6f16c5fa80fb895513d095586db53abbe30ea0..aa1faca43261eb70e796d655550d049b9b1ad551 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -107,6 +107,12 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY
 struct VP8Common;
 void vp8_default_coef_probs(struct VP8Common *);
 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
+
+#if CONFIG_HYBRIDTRANSFORM
+extern DECLARE_ALIGNED(16, const int, vp8_col_scan[16]);
+extern DECLARE_ALIGNED(16, const int, vp8_row_scan[16]);
+#endif
+
 extern short vp8_default_zig_zag_mask[16];
 extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
 extern short vp8_default_zig_zag_mask_8x8[64];// int64_t
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 433f577454fbcdb522c5c58d9c51c6ee96134bc2..7582a1c8a3af58b66dfbc6cba68a9aac78eac403 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -97,6 +97,12 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM
+#include "vp8/common/blockd.h"
+void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+#endif
+
+
 typedef prototype_idct((*vp8_idct_fn_t));
 typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
 typedef prototype_second_order((*vp8_second_order_fn_t));
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 684868cb2afab00506078da440097297e0d3936b..dbf0fda8278308764a3411ae7125442e81f6a680 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -25,6 +25,9 @@
 #include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 
+#if CONFIG_HYBRIDTRANSFORM
+#include "vp8/common/blockd.h"
+#endif
 
 #include <math.h>
 
@@ -32,6 +35,130 @@ static const int cospi8sqrt2minus1 = 20091;
 static const int sinpi8sqrt2      = 35468;
 static const int rounding = 0;
 
+#if CONFIG_HYBRIDTRANSFORM
+float idct_4[16] = {
+  0.500000000000000,   0.653281482438188,   0.500000000000000,   0.270598050073099,
+  0.500000000000000,   0.270598050073099,  -0.500000000000000,  -0.653281482438188,
+  0.500000000000000,  -0.270598050073099,  -0.500000000000000,   0.653281482438188,
+  0.500000000000000,  -0.653281482438188,   0.500000000000000,  -0.270598050073099
+};
+
+float iadst_4[16] = {
+  0.228013428883779,   0.577350269189626,   0.656538502008139,   0.428525073124360,
+  0.428525073124360,   0.577350269189626,  -0.228013428883779,  -0.656538502008139,
+  0.577350269189626,                   0,  -0.577350269189626,   0.577350269189626,
+  0.656538502008139,  -0.577350269189626,   0.428525073124359,  -0.228013428883779
+};
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+  int i, j, k;
+  float bufa[16], bufb[16]; // buffers are for floating-point test purpose
+                            // the implementation could be simplified in conjunction with integer transform
+  short *ip = input;
+  short *op = output;
+  int shortpitch = pitch >> 1;
+
+  float *pfa = &bufa[0];
+  float *pfb = &bufb[0];
+
+  // pointers to vertical and horizontal transforms
+  float *ptv, *pth;
+
+  // load and convert residual array into floating-point
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = (float)ip[i];
+    }
+    pfa += 4;
+    ip  += 4;
+  }
+
+  // vertical transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case ADST_DCT  :
+      ptv = &iadst_4[0];
+      break;
+
+    default :
+      ptv = &idct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfb[i] = 0 ;
+      for(k = 0; k < 4; k++) {
+        pfb[i] += ptv[k] * pfa[(k<<2)];
+      }
+      pfa += 1;
+    }
+
+    pfb += 4;
+    ptv += 4;
+    pfa = &bufa[0];
+  }
+
+  // horizontal transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case  DCT_ADST :
+      pth = &iadst_4[0];
+      break;
+
+    default :
+      pth = &idct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = 0;
+      for(k = 0; k < 4; k++) {
+        pfa[i] += pfb[k] * pth[k];
+      }
+      pth += 4;
+     }
+
+    pfa += 4;
+    pfb += 4;
+
+    switch(tx_type) {
+      case ADST_ADST :
+      case  DCT_ADST :
+        pth = &iadst_4[0];
+        break;
+
+      default :
+        pth = &idct_4[0];
+        break;
+    }
+  }
+
+  // convert to short integer format and load BLOCKD buffer
+  op  = output;
+  pfa = &bufa[0];
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
+                             -(short)( - pfa[i] / 8 + 0.49);
+    }
+    op  += shortpitch;
+    pfa += 4;
+  }
+}
+#endif
+
+
 void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) {
   int i;
   int a1, b1, c1, d1;
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index aea4536480bac80439a4b84566a1e8807758c7ad..d35071600bcd6602437e4fbc6c782fa2250a201c 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -31,6 +31,11 @@ static void recon_dcblock_8x8(MACROBLOCKD *x) {
 
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
+  vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type);
+}
+#endif
 
 void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
   if (b->eob <= 1)
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 4c4f0d3d26dee69e59bf5772a90abf84071d2dad..1eda173b4b8f0f66bf440a02d4a8552109ff53ac 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -15,6 +15,11 @@
 #include "vpx_ports/config.h"
 #include "idct.h"
 #include "blockd.h"
+
+#if CONFIG_HYBRIDTRANSFORM
+extern void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
+#endif
+
 extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
 extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 9eb42cc2e63de892c4bcaa090a53bb9b20a2a18f..d0e43ca9b375d1a45ff8dfe9c68e56b0a690ee41 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -128,6 +128,11 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) {
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
+#if CONFIG_HYBRIDTRANSFORM
+  xd->q_index = QIndex;
+#endif
+
+
 #if CONFIG_LOSSLESS
   if (!QIndex) {
     pbi->common.rtcd.idct.idct1        = vp8_short_inv_walsh4x4_1_x8_c;
@@ -208,6 +213,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
   int i;
   int tx_type;
 
+#if CONFIG_HYBRIDTRANSFORM
+  int QIndex = xd->q_index;
+  int active_ht = (QIndex < ACTIVE_HT);
+#endif
+
   if (pbi->common.frame_type == KEY_FRAME) {
     if (pbi->common.txfm_mode == ALLOW_8X8 &&
         xd->mode_info_context->mbmi.mode != I8X8_PRED &&
@@ -281,6 +291,39 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
 
+#if CONFIG_HYBRIDTRANSFORM
+  // parse transform types for intra 4x4 mode
+  if (mode == B_PRED) {
+    for (i = 0; i < 16; i++) {
+      BLOCKD *b = &xd->block[i];
+      int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
+      if(active_ht) {
+        switch(b_mode) {
+          case B_TM_PRED :
+          case B_RD_PRED :
+            b->bmi.as_mode.tx_type = ADST_ADST;
+            break;
+
+          case B_VE_PRED :
+          case B_VR_PRED :
+            b->bmi.as_mode.tx_type = ADST_DCT;
+            break ;
+
+          case B_HE_PRED :
+          case B_HD_PRED :
+          case B_HU_PRED :
+            b->bmi.as_mode.tx_type = DCT_ADST;
+            break;
+
+          default :
+            b->bmi.as_mode.tx_type = DCT_DCT;
+            break;
+        }
+      }
+    } // loop over 4x4 blocks
+  }
+#endif
+
   /* do prediction */
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
     if (mode != I8X8_PRED) {
@@ -360,16 +403,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
       }
 #endif
 
-      if (xd->eobs[i] > 1) {
-        DEQUANT_INVOKE(&pbi->dequant, idct_add)
-        (b->qcoeff, b->dequant,  b->predictor,
-         *(b->base_dst) + b->dst, 16, b->dst_stride);
-      } else {
-        IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-        (b->qcoeff[0] * b->dequant[0], b->predictor,
-         *(b->base_dst) + b->dst, 16, b->dst_stride);
-        ((int *)b->qcoeff)[0] = 0;
+#if CONFIG_HYBRIDTRANSFORM
+      if(active_ht)
+        vp8_ht_dequant_idct_add_c( (TX_TYPE)b->bmi.as_mode.tx_type, b->qcoeff,
+                                   b->dequant, b->predictor,
+                                   *(b->base_dst) + b->dst, 16, b->dst_stride);
+      else
+        vp8_dequant_idct_add_c(b->qcoeff, b->dequant, b->predictor,
+                               *(b->base_dst) + b->dst, 16, b->dst_stride);
+#else
+      if (xd->eobs[i] > 1)
+      {
+          DEQUANT_INVOKE(&pbi->dequant, idct_add)
+              (b->qcoeff, b->dequant,  b->predictor,
+              *(b->base_dst) + b->dst, 16, b->dst_stride);
+      }
+      else
+      {
+          IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+              (b->qcoeff[0] * b->dequant[0], b->predictor,
+              *(b->base_dst) + b->dst, 16, b->dst_stride);
+          ((int *)b->qcoeff)[0] = 0;
       }
+#endif
     }
   } else if (mode == SPLITMV) {
     DEQUANT_INVOKE(&pbi->dequant, idct_add_y_block)
@@ -378,8 +434,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
      xd->dst.y_stride, xd->eobs);
   } else {
     BLOCKD *b = &xd->block[24];
-
-
     if (tx_type == TX_8X8) {
       DEQUANT_INVOKE(&pbi->dequant, block_2x2)(b);
 #ifdef DEC_DEBUG
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 73859b0d759f5b957a45c2b41a9ff44441b4ee9e..3669cc21493b952f43d3311cc811596c255f64b3 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -41,6 +41,44 @@ void vp8_dequantize_b_c(BLOCKD *d) {
   }
 }
 
+
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
+                               unsigned char *pred, unsigned char *dest,
+                               int pitch, int stride) {
+  short output[16];
+  short *diff_ptr = output;
+  int r, c;
+  int i;
+
+  for (i = 0; i < 16; i++) {
+    input[i] = dq[i] * input[i];
+  }
+
+  vp8_iht4x4llm_c( input, output, 4 << 1, tx_type );
+
+  vpx_memset(input, 0, 32);
+
+  for (r = 0; r < 4; r++) {
+      for (c = 0; c < 4; c++) {
+        int a = diff_ptr[c] + pred[c];
+
+        if (a < 0)
+            a = 0;
+
+        if (a > 255)
+            a = 255;
+
+        dest[c] = (unsigned char) a;
+    }
+
+      dest += stride;
+      diff_ptr += 4;
+      pred += pitch;
+  }
+}
+#endif
+
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
                             unsigned char *dest, int pitch, int stride) {
   short output[16];
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index 2582d8626e546650218049ef5eaede77fa5011aa..76418fffa3293cb12de33d3ab11bef6955420371 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -76,6 +76,17 @@ extern prototype_dequant_block(vp8_dequant_block);
 #endif
 extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 
+#if CONFIG_HYBRIDTRANSFORM
+// declare dequantization and inverse transform module of hybrid transform decoder
+#ifndef vp8_ht_dequant_idct_add
+#define vp8_ht_dequant_idct_add vp8_ht_dequant_idct_add_c
+#endif
+extern void vp8_ht_dequant_idct_add(TX_TYPE tx_type, short *input, short *dq,
+                                    unsigned char *pred, unsigned char *dest,
+                                    int pitch, int stride);
+
+#endif
+
 #ifndef vp8_dequant_dc_idct_add
 #define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
 #endif
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 7f3469efb7811e5b35ef30517b3ec648e02ead93..a11308767bff3ada192b445cbabe32e0fdeee977 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -119,6 +119,53 @@ int get_token(int v) {
   else return DCT_VAL_CATEGORY6;
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr,
+                                       int block, int type, ENTROPY_CONTEXT *a,
+                                       ENTROPY_CONTEXT *l, int eob, int seg_eob,
+                                       FRAME_CONTEXT *fc) {
+  int c, pt, token, band;
+  const int *scan;
+
+  int QIndex = xd->q_index;
+  int active_ht = (QIndex < ACTIVE_HT) &&
+                  (xd->mode_info_context->mbmi.mode == B_PRED);
+
+  if(active_ht) {
+    switch(xd->block[block].bmi.as_mode.tx_type) {
+      case ADST_DCT :
+        scan = vp8_row_scan;
+        break;
+
+      case DCT_ADST :
+        scan = vp8_col_scan;
+        break;
+
+      default :
+        scan = vp8_default_zig_zag1d;
+        break;
+    }
+  } else {
+    scan = vp8_default_zig_zag1d;
+  }
+
+  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+  for (c = !type; c < eob; ++c) {
+    int rc = scan[c];
+    int v = qcoeff_ptr[rc];
+    band = vp8_coef_bands[c];
+    token = get_token(v);
+    fc->coef_counts[type][band][pt][token]++;
+    pt = vp8_prev_token_class[token];
+  }
+
+  if (eob < seg_eob) {
+    band = vp8_coef_bands[c];
+    fc->coef_counts[type][band][pt][DCT_EOB_TOKEN]++;
+  }
+}
+#endif
+
 void static count_tokens(INT16 *qcoeff_ptr, int block, int type,
                          ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                          int eob, int seg_eob, FRAME_CONTEXT *const fc) {
@@ -289,8 +336,14 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd,
     WRITE_COEF_CONTINUE(val);
   }
 #if CONFIG_ADAPTIVE_ENTROPY
+
   if (block_type == TX_4X4)
+#if CONFIG_HYBRIDTRANSFORM
+    count_tokens_adaptive_scan(xd, qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
+#else
     count_tokens(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
+#endif
+
   else
     count_tokens_8x8(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
 #endif
@@ -351,12 +404,21 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
   return eobtotal;
 }
 
+
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd) {
   ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
   ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
 
   char *const eobs = xd->eobs;
+#if CONFIG_HYBRIDTRANSFORM
+  const int *scan = vp8_default_zig_zag1d;
+  int QIndex = xd->q_index;
+  int active_ht = (QIndex < ACTIVE_HT) &&
+                  (xd->mode_info_context->mbmi.mode == B_PRED);
+#else
   const int *const scan = vp8_default_zig_zag1d;
+#endif
+
   int c, i, type, eobtotal = 0, seg_eob = 16;
   INT16 *qcoeff_ptr = &xd->qcoeff[0];
 
@@ -388,6 +450,41 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd) {
     if (i == 16)
       type = PLANE_TYPE_UV;
 
+#if CONFIG_HYBRIDTRANSFORM
+    if (type == PLANE_TYPE_Y_WITH_DC &&
+        xd->mode_info_context->mbmi.mode == B_PRED &&
+        active_ht) {
+      BLOCKD *b = &xd->block[i];
+      switch(b->bmi.as_mode.first) {
+        case B_TM_PRED :
+        case B_RD_PRED :
+          b->bmi.as_mode.tx_type = ADST_ADST;
+          scan = vp8_default_zig_zag1d;
+          break;
+
+        case B_VE_PRED :
+        case B_VR_PRED :
+          b->bmi.as_mode.tx_type = ADST_DCT;
+          scan = vp8_row_scan;
+          break ;
+
+        case B_HE_PRED :
+        case B_HD_PRED :
+        case B_HU_PRED :
+          b->bmi.as_mode.tx_type = DCT_ADST;
+          scan = vp8_col_scan;
+          break;
+
+        default :
+          b->bmi.as_mode.tx_type = DCT_DCT;
+          scan = vp8_default_zig_zag1d;
+          break;
+      }
+    }
+    if (type == PLANE_TYPE_UV) {
+      scan = vp8_default_zig_zag1d;
+    }
+#endif
     c = vp8_decode_coefs(dx, xd, a, l, type, seg_eob, qcoeff_ptr,
                          i, scan, TX_4X4, coef_bands_x);
     a[0] = l[0] = ((eobs[i] = c) != !type);
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index fbe22eb107f3985f1f9d61802efdaac6534f3a44..c3a938b6222445d88cd5388dfa6b745f3696cc98 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -13,6 +13,28 @@
 #include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 
+#if CONFIG_HYBRIDTRANSFORM
+
+#include "vp8/common/blockd.h"
+
+float dct_4[16] = {
+  0.500000000000000,  0.500000000000000,  0.500000000000000,  0.500000000000000,
+  0.653281482438188,  0.270598050073099, -0.270598050073099, -0.653281482438188,
+  0.500000000000000, -0.500000000000000, -0.500000000000000,  0.500000000000000,
+  0.270598050073099, -0.653281482438188,  0.653281482438188, -0.270598050073099
+};
+
+float adst_4[16] = {
+  0.228013428883779,  0.428525073124360,  0.577350269189626,  0.656538502008139,
+  0.577350269189626,  0.577350269189626,  0.000000000000000, -0.577350269189626,
+  0.656538502008139, -0.228013428883779, -0.577350269189626,  0.428525073124359,
+  0.428525073124360, -0.656538502008139,  0.577350269189626, -0.228013428883779
+};
+#endif
+
+
+#if CONFIG_INT_8X8FDCT
+
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
 static const int xC3S5 = 13623;
@@ -268,6 +290,112 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
 
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+  int i, j, k;
+  float bufa[16], bufb[16]; // buffers are for floating-point test purpose
+                             // the implementation could be simplified in
+                             // conjunction with integer transform
+  short *ip = input;
+  short *op = output;
+
+  float *pfa = &bufa[0];
+  float *pfb = &bufb[0];
+
+  // pointers to vertical and horizontal transforms
+  float *ptv, *pth;
+
+  // load and convert residual array into floating-point
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = (float)ip[i];
+    }
+    pfa += 4;
+    ip  += pitch / 2;
+  }
+
+  // vertical transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case ADST_DCT  :
+      ptv = &adst_4[0];
+      break;
+
+    default :
+      ptv = &dct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfb[i] = 0;
+      for(k = 0; k < 4; k++) {
+        pfb[i] += ptv[k] * pfa[(k<<2)];
+      }
+      pfa += 1;
+    }
+    pfb += 4;
+    ptv += 4;
+    pfa = &bufa[0];
+  }
+
+  // horizontal transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case  DCT_ADST :
+      pth = &adst_4[0];
+      break;
+
+    default :
+      pth = &dct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = 0;
+      for(k = 0; k < 4; k++) {
+        pfa[i] += pfb[k] * pth[k];
+      }
+      pth += 4;
+     }
+
+    pfa += 4;
+    pfb += 4;
+
+    switch(tx_type) {
+      case ADST_ADST :
+      case  DCT_ADST :
+        pth = &adst_4[0];
+        break;
+
+      default :
+        pth = &dct_4[0];
+        break;
+    }
+  }
+
+  // convert to short integer format and load BLOCKD buffer
+  op  = output ;
+  pfa = &bufa[0] ;
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
+                                   -(short)(- 8 * pfa[i] + 0.49);
+    }
+    op  += 4;
+    pfa += 4;
+  }
+}
+#endif
+
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
   int i;
   int a1, b1, c1, d1;
@@ -309,9 +437,18 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
   }
 }
 
-void vp8_short_fdct8x4_c(short *input, short *output, int pitch) {
-  vp8_short_fdct4x4_c(input,   output,    pitch);
-  vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_fht8x4_c(short *input, short *output, int pitch,
+                  TX_TYPE tx_type) {
+  vp8_fht4x4_c(input,     output,      pitch, tx_type);
+  vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type);
+}
+#endif
+
+void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
+{
+    vp8_short_fdct4x4_c(input,   output,    pitch);
+    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
 void vp8_short_walsh4x4_c(short *input, short *output, int pitch) {
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index 6d2b736b3d5cbec96ebe6a4ac134df34198ff32f..ac7769d3d0be3095b76fcf35dbbaa65c79733a5a 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -23,6 +23,10 @@
 #endif
 
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+#endif
 
 #ifndef vp8_fdct_short8x8
 #define vp8_fdct_short8x8  vp8_short_fdct8x8_c
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 7d145321883750bbf64584e6620f586c79676447..46b352ef8e59563c78a7b24cdb219b337be2b8ec 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -32,8 +32,11 @@ extern int enc_debug;
 #define IF_RTCD(x) NULL
 #endif
 
-int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
+#if CONFIG_HYBRIDTRANSFORM
+extern void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d);
+#endif
 
+int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
   int i;
   int intra_pred_var = 0;
   (void) cpi;
@@ -64,6 +67,12 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
   BLOCKD *b = &x->e_mbd.block[ib];
   BLOCK *be = &x->block[ib];
 
+#if CONFIG_HYBRIDTRANSFORM
+    int QIndex = x->q_index;
+    int active_ht = (QIndex < ACTIVE_HT);
+#endif
+
+
 #if CONFIG_COMP_INTRA_PRED
   if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
 #endif
@@ -78,11 +87,45 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
 
   ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
 
-  x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
-
-  x->quantize_b(be, b);
-
-  vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
+#if CONFIG_HYBRIDTRANSFORM
+    if(active_ht) {
+      b->bmi.as_mode.test = b->bmi.as_mode.first;
+      switch(b->bmi.as_mode.first) {
+        // case B_DC_PRED :
+        case B_TM_PRED :
+        case B_RD_PRED :
+          b->bmi.as_mode.tx_type = ADST_ADST;
+          break;
+
+        case B_VE_PRED :
+        case B_VR_PRED :
+          b->bmi.as_mode.tx_type = ADST_DCT;
+          break;
+
+        case B_HE_PRED :
+        case B_HD_PRED :
+        case B_HU_PRED :
+          b->bmi.as_mode.tx_type = DCT_ADST;
+          break;
+
+        default :
+          b->bmi.as_mode.tx_type = DCT_DCT;
+          break;
+      }
+
+      vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
+      vp8_ht_quantize_b(be, b);
+      vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;
+    } else {
+      x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32) ;
+      x->quantize_b(be, b) ;
+      vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;
+    }
+#else
+    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+    x->quantize_b(be, b);
+    vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
+#endif
 
   RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 }
@@ -273,7 +316,6 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
     RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor,
                                               b->diff, *(b->base_dst) + b->dst, b->dst_stride);
   }
-
 }
 
 extern const int vp8_i8x8_block[4];
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 821507c62b019f52111edc088651702ea0b7ca5c..78892fc3f0acd88bfa1a6e69c7e63e4905e4c2b7 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -22,6 +22,72 @@
 extern int enc_debug;
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d) {
+  int i, rc, eob;
+  int zbin;
+  int x, y, z, sz;
+  short *zbin_boost_ptr  = b->zrun_zbin_boost;
+  short *coeff_ptr       = b->coeff;
+  short *zbin_ptr        = b->zbin;
+  short *round_ptr       = b->round;
+  short *quant_ptr       = b->quant;
+  unsigned char *quant_shift_ptr = b->quant_shift;
+  short *qcoeff_ptr      = d->qcoeff;
+  short *dqcoeff_ptr     = d->dqcoeff;
+  short *dequant_ptr     = d->dequant;
+  short zbin_oq_value    = b->zbin_extra;
+
+  int const *pt_scan ;
+
+  switch(d->bmi.as_mode.tx_type) {
+    case ADST_DCT :
+      pt_scan = vp8_row_scan;
+      break;
+
+    case DCT_ADST :
+      pt_scan = vp8_col_scan;
+      break;
+
+    default :
+      pt_scan = vp8_default_zig_zag1d;
+      break;
+  }
+
+  vpx_memset(qcoeff_ptr, 0, 32);
+  vpx_memset(dqcoeff_ptr, 0, 32);
+
+  eob = -1;
+
+  for (i = 0; i < b->eob_max_offset; i++) {
+    rc   = pt_scan[i];
+    z    = coeff_ptr[rc];
+
+    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+    zbin_boost_ptr ++;
+
+    sz = (z >> 31);                                 // sign of z
+    x  = (z ^ sz) - sz;                             // x = abs(z)
+
+    if (x >= zbin) {
+      x += round_ptr[rc];
+      y  = (((x * quant_ptr[rc]) >> 16) + x)
+           >> quant_shift_ptr[rc];                // quantize (x)
+      x  = (y ^ sz) - sz;                         // get the sign back
+      qcoeff_ptr[rc]  = x;                        // write to destination
+      dqcoeff_ptr[rc] = x * dequant_ptr[rc];      // dequantized value
+
+      if (y) {
+        eob = i;                                // last nonzero coeffs
+        zbin_boost_ptr = b->zrun_zbin_boost;    // reset zero runlength
+      }
+    }
+  }
+
+  d->eob = eob + 1;
+}
+#endif
+
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) {
   int i, rc, eob;
   int zbin;
@@ -47,13 +113,14 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) {
     z    = coeff_ptr[rc];
 
     zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-    zbin_boost_ptr++;
+    zbin_boost_ptr ++;
 
     sz = (z >> 31);                                 // sign of z
     x  = (z ^ sz) - sz;                             // x = abs(z)
 
     if (x >= zbin) {
       x += round_ptr[rc];
+
       y  = (((x * quant_ptr[rc]) >> 16) + x)
            >> quant_shift_ptr[rc];                // quantize (x)
       x  = (y ^ sz) - sz;                         // get the sign back
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 2c04b54b1a75da7cf4a1f8b448afc7f035350c79..761663682907bb5790d11679debbdd643d62c4b1 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -50,6 +50,10 @@
 extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
 
+#if CONFIG_HYBRIDTRANSFORM
+extern void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d);
+#endif
+
 #if CONFIG_HIGH_PRECISION_MV
 #define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
 #else
@@ -545,9 +549,38 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
   int cost = 0;
   short *qcoeff_ptr = b->qcoeff;
 
-  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+#if CONFIG_HYBRIDTRANSFORM
+  int QIndex = mb->q_index;
+  int active_ht = (QIndex < ACTIVE_HT) &&
+                (mb->e_mbd.mode_info_context->mbmi.mode_rdopt == B_PRED);
+
+  int const *pt_scan;
+
+  if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) {
+    switch (b->bmi.as_mode.tx_type) {
+      case ADST_DCT :
+        pt_scan = vp8_row_scan;
+        break;
+
+      case DCT_ADST :
+        pt_scan = vp8_col_scan;
+        break;
+
+      default :
+        pt_scan = vp8_default_zig_zag1d;
+        break;
+    }
+
+  } else {
+    pt_scan = vp8_default_zig_zag1d;
+  }
+
+#define  QC(I)  ( qcoeff_ptr [pt_scan[I]] )
+#else
+#define QC(I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
+#endif
 
-# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
   for (; c < eob; c++) {
     int v = QC(c);
@@ -804,11 +837,17 @@ static int rd_pick_intra4x4block(
   int *bmode_costs,
   ENTROPY_CONTEXT *a,
   ENTROPY_CONTEXT *l,
-
   int *bestrate,
   int *bestratey,
   int *bestdistortion) {
   B_PREDICTION_MODE mode;
+
+#if CONFIG_HYBRIDTRANSFORM
+  int QIndex = x->q_index;
+  int active_ht = (QIndex < ACTIVE_HT);
+  TX_TYPE best_tx_type;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
   B_PREDICTION_MODE mode2;
 #endif
@@ -828,7 +867,8 @@ static int rd_pick_intra4x4block(
 
   for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) {
 #if CONFIG_COMP_INTRA_PRED
-    for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1)); mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
+    for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1));
+                   mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
 #endif
       int this_rd;
       int ratey;
@@ -853,43 +893,95 @@ static int rd_pick_intra4x4block(
       }
 #endif
       ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
-      x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
-      x->quantize_b(be, b);
 
-      tempa = ta;
-      templ = tl;
+#if CONFIG_HYBRIDTRANSFORM
+      if(active_ht) {
+        b->bmi.as_mode.test = mode;
+        switch(mode) {
+          // case B_DC_PRED :
+          case B_TM_PRED :
+          case B_RD_PRED :
+            b->bmi.as_mode.tx_type = ADST_ADST;
+            break;
+
+          case B_VE_PRED :
+          case B_VR_PRED :
+            b->bmi.as_mode.tx_type = ADST_DCT;
+            break;
+
+          case B_HE_PRED :
+          case B_HD_PRED :
+          case B_HU_PRED :
+            b->bmi.as_mode.tx_type = DCT_ADST;
+            break;
+
+          default :
+            b->bmi.as_mode.tx_type = DCT_DCT;
+            break;
+        }
 
-      ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
-      rate += ratey;
-      distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(
-                     be->coeff, b->dqcoeff) >> 2;
+        vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
+        vp8_ht_quantize_b(be, b);
+      } else {
+        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->quantize_b(be, b);
+      }
+#else
+        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->quantize_b(be, b);
+#endif
 
-      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+        tempa = ta;
+        templ = tl;
 
-      if (this_rd < best_rd) {
-        *bestrate = rate;
-        *bestratey = ratey;
-        *bestdistortion = distortion;
-        best_rd = this_rd;
-        *best_mode = mode;
-#if CONFIG_COMP_INTRA_PRED
-        *best_second_mode = mode2;
+        ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
+        rate += ratey;
+        distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(
+            be->coeff, b->dqcoeff) >> 2;
+
+        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+        if (this_rd < best_rd) {
+          *bestrate = rate;
+          *bestratey = ratey;
+          *bestdistortion = distortion;
+          best_rd = this_rd;
+          *best_mode = mode;
+#if CONFIG_HYBRIDTRANSFORM
+          best_tx_type = b->bmi.as_mode.tx_type ;
 #endif
-        *a = tempa;
-        *l = templ;
-        copy_predictor(best_predictor, b->predictor);
-        vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+
 #if CONFIG_COMP_INTRA_PRED
-      }
+          *best_second_mode = mode2;
 #endif
+          *a = tempa;
+          *l = templ;
+          copy_predictor(best_predictor, b->predictor);
+          vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+        }
+#if CONFIG_COMP_INTRA_PRED
     }
+#endif
   }
   b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
 #if CONFIG_COMP_INTRA_PRED
   b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM
+  b->bmi.as_mode.tx_type = best_tx_type;
+
+  // inverse transform
+  if(active_ht) {
+    vp8_iht4x4llm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type );
+  } else {
+    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff,
+                                                                b->diff, 32);
+  }
+#else
   IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
+#endif
+
   RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
   return best_rd;
@@ -1043,6 +1135,8 @@ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
 #endif
   return best_rd;
 }
+
+
 static int rd_pick_intra8x8block(
   VP8_COMP *cpi,
   MACROBLOCK *x,
@@ -2739,6 +2833,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     xd->mode_info_context->mbmi.pred_filter_enabled = 0;
 #endif
 
+    // current coding mode under rate-distortion optimization test loop
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = this_mode;
+#endif
+
+
 #if CONFIG_COMP_INTRA_PRED
     xd->mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
     xd->mode_info_context->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
@@ -3646,36 +3746,45 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   int error4x4, error16x16;
 #if CONFIG_COMP_INTRA_PRED
-  int error4x4d, rate4x4d, dist4x4d;
+    int error4x4d, rate4x4d, dist4x4d;
 #endif
-  int rate4x4, rate16x16 = 0, rateuv;
-  int dist4x4, dist16x16, distuv;
-  int rate;
-  int rate4x4_tokenonly = 0;
-  int rate16x16_tokenonly = 0;
-  int rateuv_tokenonly = 0;
-  int error8x8, rate8x8_tokenonly = 0;
-  int rate8x8, dist8x8;
-  int mode16x16;
-  int mode8x8[2][4];
+    int rate4x4, rate16x16 = 0, rateuv;
+    int dist4x4, dist16x16, distuv;
+    int rate;
+    int rate4x4_tokenonly = 0;
+    int rate16x16_tokenonly = 0;
+    int rateuv_tokenonly = 0;
+    int error8x8, rate8x8_tokenonly=0;
+    int rate8x8, dist8x8;
+    int mode16x16;
+    int mode8x8[2][4];
+
+    xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
-  xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+    rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
+    rate = rateuv;
 
-  rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
-  rate = rateuv;
+    // current macroblock under rate-distortion optimization test loop
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = DC_PRED;
+#endif
 
-  error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
-                                          &rate16x16, &rate16x16_tokenonly,
-                                          &dist16x16);
-  mode16x16 = xd->mode_info_context->mbmi.mode;
+    error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
+                                            &rate16x16, &rate16x16_tokenonly,
+                                            &dist16x16);
+    mode16x16 = xd->mode_info_context->mbmi.mode;
 
-  error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
-                                       &rate8x8, &rate8x8_tokenonly,
-                                       &dist8x8, error16x16);
-  mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first;
-  mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first;
-  mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first;
-  mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first;
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = I8X8_PRED;
+#endif
+
+    error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
+                &rate8x8, &rate8x8_tokenonly,
+                &dist8x8, error16x16);
+    mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first;
+    mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first;
+    mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first;
+    mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first;
 #if CONFIG_COMP_INTRA_PRED
   mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second;
   mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second;
@@ -3683,9 +3792,13 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) {
   mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second;
 #endif
 
-  error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
-                                       &rate4x4, &rate4x4_tokenonly,
-                                       &dist4x4, error16x16,
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = B_PRED;
+#endif
+
+    error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
+                                         &rate4x4, &rate4x4_tokenonly,
+                                         &dist4x4, error16x16,
 #if CONFIG_COMP_INTRA_PRED
                                        0,
 #endif
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 2d58669e08d8df342481677038afc5db3baac67a..b00c5b2bf47c549e4672859d6745fa4f6e56e182 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -298,6 +298,164 @@ static void tokenize1st_order_b_8x8
   *a = *l = pt;
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+static void tokenize1st_order_ht(   MACROBLOCKD *xd,
+                                    TOKENEXTRA **tp,
+                                    int type,
+                                    VP8_COMP    *cpi) {
+  unsigned int block;
+  const BLOCKD *b;
+  int pt;             /* near block/prev token context index */
+  int c;
+  int token;
+  TOKENEXTRA *t = *tp;/* store tokens starting here */
+  const short *qcoeff_ptr;
+  ENTROPY_CONTEXT * a;
+  ENTROPY_CONTEXT * l;
+  int band, rc, v;
+  int tmp1, tmp2;
+
+  int const *pt_scan ;
+
+  int seg_eob = 16;
+  int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+  if ( segfeature_active( xd, segment_id, SEG_LVL_EOB ) ) {
+    seg_eob = get_segdata( xd, segment_id, SEG_LVL_EOB );
+  }
+
+  b = xd->block;
+
+  /* Luma */
+  for (block = 0; block < 16; block++, b++) {
+    B_PREDICTION_MODE b_mode;
+
+    if( xd->mode_info_context->mbmi.mode == B_PRED ) {
+      b_mode = b->bmi.as_mode.first;
+    }
+
+    // assign scanning order for luma components coded in intra4x4 mode
+    if( ( ( xd->mode_info_context->mbmi.mode == B_PRED ) ||
+          ( xd->mode_info_context->mbmi.mode == I8X8_PRED ) ) &&
+        ( type == PLANE_TYPE_Y_WITH_DC) ) {
+      switch(b_mode) {
+        case B_VE_PRED :
+        case B_VR_PRED :
+          pt_scan = vp8_row_scan;
+          break;
+
+        case B_HE_PRED :
+        case B_HD_PRED :
+        case B_HU_PRED :
+          pt_scan = vp8_col_scan;
+          break;
+
+        default :
+          pt_scan = vp8_default_zig_zag1d;
+          break;
+      }
+    } else {
+      pt_scan = vp8_default_zig_zag1d;
+    }
+
+    tmp1 = vp8_block2above[block];
+    tmp2 = vp8_block2left[block];
+    qcoeff_ptr = b->qcoeff;
+    a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+    l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+    c = type ? 0 : 1;
+
+    for (; c < b->eob; c++) {
+      rc = pt_scan[c];
+      band = vp8_coef_bands[c];
+      v = qcoeff_ptr[rc];
+
+      t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+      token    = vp8_dct_value_tokens_ptr[v].Token;
+
+      t->Token = token;
+      t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+
+      t->skip_eob_node = pt == 0 &&
+          ((band > 0 && type > 0) || (band > 1 && type == 0));
+
+      ++cpi->coef_counts       [type] [band] [pt] [token];
+
+      pt = vp8_prev_token_class[token];
+      t++;
+    }
+
+    if (c < seg_eob) {
+      band = vp8_coef_bands[c];
+      t->Token = DCT_EOB_TOKEN;
+      t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+
+      t->skip_eob_node = pt == 0 &&
+          ((band > 0 && type > 0) || (band > 1 && type == 0));
+
+      ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
+
+      t++;
+    }
+
+    *tp = t;
+    pt = (c != !type); /* 0 <-> all coeff data is zero */
+    *a = *l = pt;
+  }
+
+  // reset scanning order for chroma components
+  pt_scan = vp8_default_zig_zag1d ;
+
+  /* Chroma */
+  for (block = 16; block < 24; block++, b++) {
+    tmp1 = vp8_block2above[block];
+    tmp2 = vp8_block2left[block];
+    qcoeff_ptr = b->qcoeff;
+    a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+    l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
+
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+    for (c = 0; c < b->eob; c++) {
+      rc = pt_scan[c];
+      band = vp8_coef_bands[c];
+      v = qcoeff_ptr[rc];
+
+      t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+      token    = vp8_dct_value_tokens_ptr[v].Token;
+
+      t->Token = token;
+      t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+      t->skip_eob_node = ((pt == 0) && (band > 0));
+
+      ++cpi->coef_counts       [2] [band] [pt] [token];
+
+      pt = vp8_prev_token_class[token];
+      t++;
+  }
+
+    if (c < seg_eob) {
+      band = vp8_coef_bands[c];
+      t->Token = DCT_EOB_TOKEN;
+      t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+      t->skip_eob_node = ((pt == 0) && (band > 0));
+
+      ++cpi->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];
+
+      t++;
+    }
+
+    *tp = t;
+    pt = (c != 0); /* 0 <-> all coeff data is zero */
+    *a = *l = pt;
+  }
+}
+#endif
+
 static void tokenize1st_order_b
 (
   MACROBLOCKD *xd,
@@ -483,6 +641,11 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) {
   int skip_inc;
   int segment_id = x->mode_info_context->mbmi.segment_id;
 
+#if CONFIG_HYBRIDTRANSFORM
+    int QIndex = cpi->mb.q_index;
+    int active_ht = (QIndex < ACTIVE_HT);
+#endif
+
   if (!segfeature_active(x, segment_id, SEG_LVL_EOB) ||
       (get_segdata(x, segment_id, SEG_LVL_EOB) != 0)) {
     skip_inc = 1;
@@ -560,9 +723,17 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) {
       *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
       *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
     }
-  } else
-
+  } else {
+#if CONFIG_HYBRIDTRANSFORM
+    if(active_ht) {
+      tokenize1st_order_ht(x, t, plane_type, cpi);
+    } else {
+      tokenize1st_order_b(x, t, plane_type, cpi);
+    }
+#else
     tokenize1st_order_b(x, t, plane_type, cpi);
+#endif
+  }
 }