From de6dfa6bb0cd680ca446a15b52b2a026859eb1e6 Mon Sep 17 00:00:00 2001
From: Jingning Han <jingning@google.com>
Date: Wed, 29 Aug 2012 11:25:38 -0700
Subject: [PATCH] hybrid transform of 16x16 dimension

Enable ADST/DCT of dimension 16x16 for I16X16 modes. This change provides
benefits mostly for hd sequences.

Set up the framework for selectable transform dimension.

Also allowing quantization parameter threshold to control the use
of hybrid transform (This is currently disabled by setting threshold
always above the quantization parameter. Adaptive thresholding can
be built upon this, which will further improve the coding performance.)

The coding performance gains (with respect to the codec that has all
other configuration settings turned on) are

derf:   0.013
yt:     0.086
hd:     0.198
std-hd: 0.501

Change-Id: Ibb4263a61fc74e0b3c345f54d73e8c73552bf926
---
 configure                              |   1 +
 vp8/common/blockd.h                    |  21 ++-
 vp8/common/coefupdateprobs.h           |   2 +-
 vp8/common/default_coef_probs.h        | 193 +++++++++++++------------
 vp8/common/entropy.c                   |   6 +-
 vp8/common/entropy.h                   |   5 +-
 vp8/common/generic/systemdependent.c   |   2 +-
 vp8/common/idct.h                      |   6 +-
 vp8/common/idctllm.c                   |  98 +++++++++++--
 vp8/common/invtrans.c                  |   2 +-
 vp8/common/invtrans.h                  |   2 +-
 vp8/common/loopfilter.c                |   8 +-
 vp8/common/onyxc_int.h                 |   6 +-
 vp8/common/reconinter.c                |   1 -
 vp8/decoder/decodframe.c               |  47 ++++--
 vp8/decoder/dequantize.c               |  43 +++++-
 vp8/decoder/dequantize.h               |  10 +-
 vp8/decoder/detokenize.c               |  13 +-
 vp8/decoder/detokenize.h               |   2 +-
 vp8/decoder/generic/dsystemdependent.c |   2 +-
 vp8/decoder/onyxd_int.h                |   2 +-
 vp8/encoder/bitstream.c                |  12 +-
 vp8/encoder/block.h                    |  10 +-
 vp8/encoder/dct.c                      |  98 +++++++++++--
 vp8/encoder/dct.h                      |   6 +-
 vp8/encoder/encodeframe.c              |   6 +-
 vp8/encoder/encodeintra.c              |  37 ++++-
 vp8/encoder/encodemb.c                 |  16 +-
 vp8/encoder/encodemb.h                 |   2 +-
 vp8/encoder/generic/csystemdependent.c |   2 +-
 vp8/encoder/onyx_if.c                  |   8 +-
 vp8/encoder/onyx_int.h                 |   8 +-
 vp8/encoder/quantize.c                 |  34 ++---
 vp8/encoder/quantize.h                 |   4 +-
 vp8/encoder/ratectrl.c                 |   4 +-
 vp8/encoder/rdopt.c                    |  96 ++++++++++--
 vp8/encoder/tokenize.c                 |  38 ++---
 vp8/encoder/tokenize.h                 |   2 +-
 38 files changed, 600 insertions(+), 255 deletions(-)

diff --git a/configure b/configure
index 5c9e120679..2180ae37f3 100755
--- a/configure
+++ b/configure
@@ -227,6 +227,7 @@ EXPERIMENT_LIST="
     tx16x16
     newbestrefmv
     new_mvref
+    hybridtransform16x16
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 940b0b4f80..9862b28e58 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -133,13 +133,13 @@ typedef enum {
 typedef enum {
   TX_4X4,                      // 4x4 dct transform
   TX_8X8,                      // 8x8 dct transform
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   TX_16X16,                    // 16x16 dct transform
 #endif
   TX_SIZE_MAX                  // Number of different transforms available
 } TX_SIZE;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 typedef enum {
   DCT_DCT   = 0,                      // DCT  in both horizontal and vertical
   ADST_DCT  = 1,                      // ADST in horizontal, DCT in vertical
@@ -155,10 +155,14 @@ typedef enum {
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 #define ACTIVE_HT 110                // quantization stepsize threshold
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+#define ACTIVE_HT16 300
+#endif
+
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -190,7 +194,7 @@ typedef enum {
 } MV_REF_TYPE;
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // convert MB_PREDICTION_MODE to B_PREDICTION_MODE
 static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
   B_PREDICTION_MODE b_mode;
@@ -244,7 +248,7 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
 union b_mode_info {
   struct {
     B_PREDICTION_MODE first;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
     B_PREDICTION_MODE test;
     TX_TYPE           tx_type;
 #endif
@@ -269,7 +273,7 @@ typedef enum {
 
 typedef struct {
   MB_PREDICTION_MODE mode, uv_mode;
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   MB_PREDICTION_MODE mode_rdopt;
 #endif
 
@@ -457,15 +461,16 @@ typedef struct MacroBlockD {
 #endif
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   int q_index;
 #endif
 
 } MACROBLOCKD;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // transform mapping
 static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
+  // map transform type
   switch (bmode) {
     case B_TM_PRED :
     case B_RD_PRED :
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 0fb25cc945..0610356cc0 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -13,7 +13,7 @@
    Generated file included by entropy.c */
 #define COEF_UPDATE_PROB 252
 #define COEF_UPDATE_PROB_8X8 252
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #define COEF_UPDATE_PROB_16X16 252
 #endif
 
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
index 940e971b78..3fcacb2789 100644
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -488,7 +488,7 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
   }
 #endif
 };
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static const vp8_prob
 vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16]
                             [COEF_BANDS]
@@ -546,103 +546,104 @@ vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16]
     }
   },
   { /* block Type 1 */
-    { /* Coeff Band 0 */
-      { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128},
-      { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128},
-      { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128}
-    },
-    { /* Coeff Band 1 */
-      { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128},
-      { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128},
-      { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128},
-      { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}
-    },
-    { /* Coeff Band 2 */
-      { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128},
-      { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128},
-      { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128},
-      { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 3 */
-      { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128},
-      { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128},
-      { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128},
-      { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 4 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 5 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 6 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 7 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    }
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
   },
   { /* block Type 2 */
-    { /* Coeff Band 0 */
-      { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128},
-      { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128},
-      { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128},
-      { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}
-    },
-    { /* Coeff Band 1 */
-      { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128},
-      { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128},
-      { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128},
-      { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}
-    },
-    { /* Coeff Band 2 */
-      { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128},
-      { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128},
-      { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128},
-      { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}
-    },
-    { /* Coeff Band 3 */
-      { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128},
-      { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128},
-      { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128},
-      { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}
-    },
-    { /* Coeff Band 4 */
-      { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128},
-      { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128},
-      { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128},
-      { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}
-    },
-    { /* Coeff Band 5 */
-      { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128},
-      { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128},
-      { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128},
-      { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}
-    },
-    { /* Coeff Band 6 */
-      { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128},
-      { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128},
-      { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128},
-      { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}
-    },
-    { /* Coeff Band 7 */
-      { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128},
-      { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128},
-      { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128},
-      { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
-    }
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
   },
   { /* block Type 3 */
     { /* Coeff Band 0 */
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index cbe798289f..9ccb18554b 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -95,7 +95,7 @@ DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = {
   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
 };
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 // Table can be optimized.
 DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]) = {
     0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
@@ -212,7 +212,7 @@ void vp8_default_coef_probs(VP8_COMMON *pc) {
   vpx_memcpy(pc->fc.coef_probs_8x8, vp8_default_coef_probs_8x8,
              sizeof(pc->fc.coef_probs_8x8));
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vpx_memcpy(pc->fc.coef_probs_16x16, vp8_default_coef_probs_16x16,
              sizeof(pc->fc.coef_probs_16x16));
 #endif
@@ -335,7 +335,7 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
         }
       }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   for (i = 0; i < BLOCK_TYPES_16X16; ++i)
     for (j = 0; j < COEF_BANDS; ++j)
       for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index a435448e60..f82df96814 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -67,6 +67,7 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
 #else
 #define BLOCK_TYPES_8X8 3
 #endif
+
 #define BLOCK_TYPES_16X16 4
 
 /* Middle dimension is a coarsening of the coefficient's
@@ -75,7 +76,7 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
 #define COEF_BANDS 8
 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands[16]);
 extern DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]);
 #endif
 
@@ -118,7 +119,7 @@ extern short vp8_default_zig_zag_mask[16];
 extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
 void vp8_coef_tree_initialize(void);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d_16x16[256]);
 #endif
 void vp8_adapt_coef_probs(struct VP8Common *);
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index d28024cda5..32b5e5a6c0 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -33,7 +33,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) {
   rtcd->idct.idct8        = vp8_short_idct8x8_c;
   rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c;
   rtcd->idct.ihaar2       = vp8_short_ihaar2x2_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   rtcd->idct.idct16x16    = vp8_short_idct16x16_c;
 #endif
   rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 2a410c34ee..a4246c2a79 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -43,7 +43,7 @@
 #define Y2_WHT_UPSCALE_FACTOR 2
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_idct_idct16x16
 #define vp8_idct_idct16x16 vp8_short_idct16x16_c
 #endif
@@ -111,7 +111,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 #include "vp8/common/blockd.h"
 void vp8_ihtllm_c(short *input, short *output, int pitch,
                   TX_TYPE tx_type, int tx_dim);
@@ -136,7 +136,7 @@ typedef struct {
   vp8_idct_fn_t ihaar2;
   vp8_idct_fn_t ihaar2_1;
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_idct_fn_t            idct16x16;
 #endif
 } vp8_idct_rtcd_vtable_t;
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index bf019af065..b4475c628a 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -37,7 +37,7 @@ static const int rounding = 0;
 
 // TODO: these transforms can be further converted into integer forms
 //       for complexity optimization
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 float idct_4[16] = {
   0.500000000000000,   0.653281482438188,   0.500000000000000,   0.270598050073099,
   0.500000000000000,   0.270598050073099,  -0.500000000000000,  -0.653281482438188,
@@ -89,11 +89,85 @@ float iadst_8[64] = {
   0.483002021635509,  -0.466553967085785,   0.434217976756762,  -0.387095214016348,
   0.326790388032145,  -0.255357107325375,   0.175227946595736,  -0.089131608307532
 };
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+float idct_16[256] = {
+  0.250000,  0.351851,  0.346760,  0.338330,  0.326641,  0.311806,  0.293969,  0.273300,
+  0.250000,  0.224292,  0.196424,  0.166664,  0.135299,  0.102631,  0.068975,  0.034654,
+  0.250000,  0.338330,  0.293969,  0.224292,  0.135299,  0.034654, -0.068975, -0.166664,
+ -0.250000, -0.311806, -0.346760, -0.351851, -0.326641, -0.273300, -0.196424, -0.102631,
+  0.250000,  0.311806,  0.196424,  0.034654, -0.135299, -0.273300, -0.346760, -0.338330,
+ -0.250000, -0.102631,  0.068975,  0.224292,  0.326641,  0.351851,  0.293969,  0.166664,
+  0.250000,  0.273300,  0.068975, -0.166664, -0.326641, -0.338330, -0.196424,  0.034654,
+  0.250000,  0.351851,  0.293969,  0.102631, -0.135299, -0.311806, -0.346760, -0.224292,
+  0.250000,  0.224292, -0.068975, -0.311806, -0.326641, -0.102631,  0.196424,  0.351851,
+  0.250000, -0.034654, -0.293969, -0.338330, -0.135299,  0.166664,  0.346760,  0.273300,
+  0.250000,  0.166664, -0.196424, -0.351851, -0.135299,  0.224292,  0.346760,  0.102631,
+ -0.250000, -0.338330, -0.068975,  0.273300,  0.326641,  0.034654, -0.293969, -0.311806,
+  0.250000,  0.102631, -0.293969, -0.273300,  0.135299,  0.351851,  0.068975, -0.311806,
+ -0.250000,  0.166664,  0.346760,  0.034654, -0.326641, -0.224292,  0.196424,  0.338330,
+  0.250000,  0.034654, -0.346760, -0.102631,  0.326641,  0.166664, -0.293969, -0.224292,
+  0.250000,  0.273300, -0.196424, -0.311806,  0.135299,  0.338330, -0.068975, -0.351851,
+  0.250000, -0.034654, -0.346760,  0.102631,  0.326641, -0.166664, -0.293969,  0.224292,
+  0.250000, -0.273300, -0.196424,  0.311806,  0.135299, -0.338330, -0.068975,  0.351851,
+  0.250000, -0.102631, -0.293969,  0.273300,  0.135299, -0.351851,  0.068975,  0.311806,
+ -0.250000, -0.166664,  0.346760, -0.034654, -0.326641,  0.224292,  0.196424, -0.338330,
+  0.250000, -0.166664, -0.196424,  0.351851, -0.135299, -0.224292,  0.346760, -0.102631,
+ -0.250000,  0.338330, -0.068975, -0.273300,  0.326641, -0.034654, -0.293969,  0.311806,
+  0.250000, -0.224292, -0.068975,  0.311806, -0.326641,  0.102631,  0.196424, -0.351851,
+  0.250000,  0.034654, -0.293969,  0.338330, -0.135299, -0.166664,  0.346760, -0.273300,
+  0.250000, -0.273300,  0.068975,  0.166664, -0.326641,  0.338330, -0.196424, -0.034654,
+  0.250000, -0.351851,  0.293969, -0.102631, -0.135299,  0.311806, -0.346760,  0.224292,
+  0.250000, -0.311806,  0.196424, -0.034654, -0.135299,  0.273300, -0.346760,  0.338330,
+ -0.250000,  0.102631,  0.068975, -0.224292,  0.326641, -0.351851,  0.293969, -0.166664,
+  0.250000, -0.338330,  0.293969, -0.224292,  0.135299, -0.034654, -0.068975,  0.166664,
+ -0.250000,  0.311806, -0.346760,  0.351851, -0.326641,  0.273300, -0.196424,  0.102631,
+  0.250000, -0.351851,  0.346760, -0.338330,  0.326641, -0.311806,  0.293969, -0.273300,
+  0.250000, -0.224292,  0.196424, -0.166664,  0.135299, -0.102631,  0.068975, -0.034654
+};
+
+float iadst_16[256] = {
+  0.033094,  0.098087,  0.159534,  0.215215,  0.263118,  0.301511,  0.329007,  0.344612,
+  0.347761,  0.338341,  0.316693,  0.283599,  0.240255,  0.188227,  0.129396,  0.065889,
+  0.065889,  0.188227,  0.283599,  0.338341,  0.344612,  0.301511,  0.215215,  0.098087,
+ -0.033094, -0.159534, -0.263118, -0.329007, -0.347761, -0.316693, -0.240255, -0.129396,
+  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,  0.000000, -0.188227, -0.316693,
+ -0.344612, -0.263118, -0.098087,  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,
+  0.129396,  0.316693,  0.329007,  0.159534, -0.098087, -0.301511, -0.338341, -0.188227,
+  0.065889,  0.283599,  0.344612,  0.215215, -0.033094, -0.263118, -0.347761, -0.240255,
+  0.159534,  0.344612,  0.240255, -0.065889, -0.316693, -0.301511, -0.033094,  0.263118,
+  0.338341,  0.129396, -0.188227, -0.347761, -0.215215,  0.098087,  0.329007,  0.283599,
+  0.188227,  0.344612,  0.098087, -0.263118, -0.316693, -0.000000,  0.316693,  0.263118,
+ -0.098087, -0.344612, -0.188227,  0.188227,  0.344612,  0.098087, -0.263118, -0.316693,
+  0.215215,  0.316693, -0.065889, -0.347761, -0.098087,  0.301511,  0.240255, -0.188227,
+ -0.329007,  0.033094,  0.344612,  0.129396, -0.283599, -0.263118,  0.159534,  0.338341,
+  0.240255,  0.263118, -0.215215, -0.283599,  0.188227,  0.301511, -0.159534, -0.316693,
+  0.129396,  0.329007, -0.098087, -0.338341,  0.065889,  0.344612, -0.033094, -0.347761,
+  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,  0.000000, -0.344612,  0.098087,
+  0.316693, -0.188227, -0.263118,  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,
+  0.283599,  0.098087, -0.347761,  0.129396,  0.263118, -0.301511, -0.065889,  0.344612,
+ -0.159534, -0.240255,  0.316693,  0.033094, -0.338341,  0.188227,  0.215215, -0.329007,
+  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000,
+ -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,
+  0.316693, -0.098087, -0.188227,  0.344612, -0.263118, -0.000000,  0.263118, -0.344612,
+  0.188227,  0.098087, -0.316693,  0.316693, -0.098087, -0.188227,  0.344612, -0.263118,
+  0.329007, -0.188227, -0.033094,  0.240255, -0.344612,  0.301511, -0.129396, -0.098087,
+  0.283599, -0.347761,  0.263118, -0.065889, -0.159534,  0.316693, -0.338341,  0.215215,
+  0.338341, -0.263118,  0.129396,  0.033094, -0.188227,  0.301511, -0.347761,  0.316693,
+ -0.215215,  0.065889,  0.098087, -0.240255,  0.329007, -0.344612,  0.283599, -0.159534,
+  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,  0.000000, -0.098087,  0.188227,
+ -0.263118,  0.316693, -0.344612,  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,
+  0.347761, -0.344612,  0.338341, -0.329007,  0.316693, -0.301511,  0.283599, -0.263118,
+  0.240255, -0.215215,  0.188227, -0.159534,  0.129396, -0.098087,  0.065889, -0.033094
+};
+#endif
 
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_ihtllm_c(short *input, short *output, int pitch,
                   TX_TYPE tx_type, int tx_dim) {
   int i, j, k;
-  float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+  float bufa[256], bufb[256]; // buffers are for floating-point test purpose
                             // the implementation could be simplified in
                             // conjunction with integer transform
 
@@ -126,11 +200,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
   switch(tx_type) {
     case ADST_ADST :
     case ADST_DCT  :
-      ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+      ptv = (tx_dim == 4) ? &iadst_4[0] :
+                            ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
       break;
 
     default :
-      ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+      ptv = (tx_dim == 4) ? &idct_4[0] :
+                            ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
       break;
   }
 
@@ -155,11 +231,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
   switch(tx_type) {
     case ADST_ADST :
     case  DCT_ADST :
-      pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+      pth = (tx_dim == 4) ? &iadst_4[0] :
+                            ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
       break;
 
     default :
-      pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+      pth = (tx_dim == 4) ? &idct_4[0] :
+                            ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
       break;
   }
 
@@ -178,11 +256,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
     switch(tx_type) {
       case ADST_ADST :
       case  DCT_ADST :
-        pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+        pth = (tx_dim == 4) ? &iadst_4[0] :
+                              ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
         break;
 
       default :
-        pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+        pth = (tx_dim == 4) ? &idct_4[0] :
+                              ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
         break;
     }
   }
@@ -692,7 +772,7 @@ void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) {
 }
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #if 0
 // Keep a really bad float version as reference for now.
 void vp8_short_idct16x16_c(short *input, short *output, int pitch) {
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 1d63f465a4..9fc94eab8b 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -171,7 +171,7 @@ void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd,
 
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd,
                                    short *input_dqcoeff,
                                    short *output_coeff, int pitch) {
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 76258d435d..2097c368c3 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -30,7 +30,7 @@ extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MAC
 extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd);
 extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd,
                                           short *input_dqcoeff, short *output_coeff,
                                           int pitch);
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index d9c4b54be9..727326cba0 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -336,7 +336,7 @@ void vp8_loop_filter_frame
             (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -361,7 +361,7 @@ void vp8_loop_filter_frame
             (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -479,7 +479,7 @@ void vp8_loop_filter_frame_yonly
             (y_ptr, 0, 0, post->y_stride, 0, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -497,7 +497,7 @@ void vp8_loop_filter_frame_yonly
             (y_ptr, 0, 0, post->y_stride, 0, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index b7a543220a..b76c4cc220 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -52,7 +52,7 @@ typedef struct frame_contexts {
   vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1];
   vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
   MV_CONTEXT mvc[2];
@@ -76,7 +76,7 @@ typedef struct frame_contexts {
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
@@ -84,7 +84,7 @@ typedef struct frame_contexts {
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
   unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 #endif
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 0212c92c77..70d3832b2b 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -750,7 +750,6 @@ void vp8_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
 }
 
 
-
 void vp8_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
                                             unsigned char *dst_y,
                                             unsigned char *dst_u,
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 1ec4cb17a9..d932683962 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -125,7 +125,7 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) {
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   xd->q_index = QIndex;
 #endif
 
@@ -234,17 +234,21 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
   int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
   int QIndex;
   int active_ht;
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int active_ht16;
+#endif
+
   // re-initialize macroblock dequantizer before detokenization
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
 
   if (pbi->common.frame_type == KEY_FRAME) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (xd->mode_info_context->mbmi.mode <= TM_PRED ||
         xd->mode_info_context->mbmi.mode == NEWMV ||
         xd->mode_info_context->mbmi.mode == ZEROMV ||
@@ -263,7 +267,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
     else
       xd->mode_info_context->mbmi.txfm_size = TX_4X4;
   } else {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (xd->mode_info_context->mbmi.mode <= TM_PRED ||
         xd->mode_info_context->mbmi.mode == NEWMV ||
         xd->mode_info_context->mbmi.mode == ZEROMV ||
@@ -316,7 +320,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
       xd->block[i].eob = 0;
       xd->eobs[i] = 0;
     }
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16)
       eobtotal = vp8_decode_mb_tokens_16x16(pbi, xd);
     else
@@ -377,7 +381,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
 //  if (xd->segmentation_enabled)
 //    mb_init_dequantizer(pbi, xd);
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
   // parse transform types for intra 4x4 mode
   QIndex = xd->q_index;
   active_ht = (QIndex < ACTIVE_HT);
@@ -391,6 +395,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
   }
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  active_ht16 = (QIndex < ACTIVE_HT16);
+#endif
+
   /* do prediction */
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
 #if CONFIG_SUPERBLOCKS
@@ -537,11 +545,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
   } else {
     BLOCKD *b = &xd->block[24];
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16) {
+#if CONFIG_HYBRIDTRANSFORM16X16
+      if (mode < I8X8_PRED && active_ht16) {
+        BLOCKD *bd = &xd->block[0];
+        TX_TYPE txfm;
+        txfm_map(bd, pred_mode_conv(mode));
+        txfm = bd->bmi.as_mode.tx_type;
+
+        vp8_ht_dequant_idct_add_16x16_c(txfm, xd->qcoeff,
+                                        xd->block[0].dequant, xd->predictor,
+                                        xd->dst.y_buffer, 16, xd->dst.y_stride);
+      } else {
+        vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant,
+                                     xd->predictor, xd->dst.y_buffer,
+                                     16, xd->dst.y_stride);
+      }
+#else
       vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant,
                                    xd->predictor, xd->dst.y_buffer,
                                    16, xd->dst.y_stride);
+#endif
     }
     else
 #endif
@@ -641,7 +666,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
     if (!xd->mode_info_context->mbmi.encoded_as_sb) {
 #endif
       if (tx_type == TX_8X8
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || tx_type == TX_16X16
 #endif
       )
@@ -1047,7 +1072,7 @@ static void read_coef_probs(VP8D_COMP *pbi) {
         }
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   // 16x16
   if (vp8_read_bit(bc)) {
     // read coef probability tree
@@ -1430,7 +1455,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
 
   vp8_copy(pbi->common.fc.pre_coef_probs, pbi->common.fc.coef_probs);
   vp8_copy(pbi->common.fc.pre_coef_probs_8x8, pbi->common.fc.coef_probs_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(pbi->common.fc.pre_coef_probs_16x16, pbi->common.fc.coef_probs_16x16);
 #endif
   vp8_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
@@ -1443,7 +1468,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
   vp8_copy(pbi->common.fc.pre_mvc_hp, pbi->common.fc.mvc_hp);
   vp8_zero(pbi->common.fc.coef_counts);
   vp8_zero(pbi->common.fc.coef_counts_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_zero(pbi->common.fc.coef_counts_16x16);
 #endif
   vp8_zero(pbi->common.fc.ymode_counts);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 6164c44d53..95a3ae2721 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -468,7 +468,48 @@ void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
 #endif
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_HYBRIDTRANSFORM16X16
+void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
+                                     unsigned char *pred, unsigned char *dest,
+                                     int pitch, int stride) {
+  short output[256];
+  short *diff_ptr = output;
+  int r, c, i;
+
+  input[0]= input[0] * dq[0];
+
+  // recover quantizer for 4 4x4 blocks
+  for (i = 1; i < 256; i++)
+    input[i] = input[i] * dq[1];
+
+  // inverse hybrid transform
+  vp8_ihtllm_c(input, output, 32, tx_type, 16);
+
+  // the idct halves ( >> 1) the pitch
+  // vp8_short_idct16x16_c(input, output, 32);
+
+  vpx_memset(input, 0, 512);
+
+  for (r = 0; r < 16; r++) {
+    for (c = 0; c < 16; c++) {
+      int a = diff_ptr[c] + pred[c];
+
+      if (a < 0)
+        a = 0;
+      else if (a > 255)
+        a = 255;
+
+      dest[c] = (unsigned char) a;
+    }
+
+    dest += stride;
+    diff_ptr += 16;
+    pred += pitch;
+  }
+}
+#endif
+
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_dequant_idct_add_16x16_c(short *input, short *dq, unsigned char *pred,
                                   unsigned char *dest, int pitch, int stride) {
   short output[256];
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index 2326e467d0..e739ff8427 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -145,7 +145,7 @@ extern prototype_dequant_idct_add_y_block_8x8(vp8_dequant_idct_add_y_block_8x8);
 #endif
 extern prototype_dequant_idct_add_uv_block_8x8(vp8_dequant_idct_add_uv_block_8x8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_dequant_idct_add_16x16
 #define vp8_dequant_idct_add_16x16 vp8_dequant_idct_add_16x16_c
 #endif
@@ -184,7 +184,7 @@ typedef struct {
   vp8_dequant_dc_idct_add_y_block_fn_t_8x8 dc_idct_add_y_block_8x8;
   vp8_dequant_idct_add_y_block_fn_t_8x8    idct_add_y_block_8x8;
   vp8_dequant_idct_add_uv_block_fn_t_8x8   idct_add_uv_block_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_dequant_idct_add_fn_t            idct_add_16x16;
 #endif
 } vp8_dequant_rtcd_vtable_t;
@@ -201,6 +201,12 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
                                    int pitch, int stride);
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
+                                     unsigned char *pred, unsigned char *dest,
+                                     int pitch, int stride);
+#endif
+
 #if CONFIG_SUPERBLOCKS
 void vp8_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq,
                                                    unsigned char *dst,
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index a6c8370843..e9ea0c10b2 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -39,7 +39,7 @@ DECLARE_ALIGNED(16, const int, coef_bands_x_8x8[64]) = {
   7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X,
 };
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 DECLARE_ALIGNED(16, const int, coef_bands_x_16x16[256]) = {
   0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X, 5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X,
   6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
@@ -105,7 +105,7 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *xd) {
   if ((xd->mode_info_context->mbmi.mode != B_PRED &&
       xd->mode_info_context->mbmi.mode != I8X8_PRED &&
       xd->mode_info_context->mbmi.mode != SPLITMV)
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || xd->mode_info_context->mbmi.txfm_size == TX_16X16
 #endif
       ) {
@@ -225,7 +225,7 @@ void static count_tokens_8x8(INT16 *qcoeff_ptr, int block, int type,
   }
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, int type,
                                ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                int eob, int seg_eob, FRAME_CONTEXT *fc) {
@@ -302,7 +302,7 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd,
     case TX_8X8:
       coef_probs = fc->coef_probs_8x8[type][0][0];
       break;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     case TX_16X16:
       coef_probs = fc->coef_probs_16x16[type][0][0];
       break;
@@ -398,14 +398,14 @@ SKIP_START:
   }
   else if (block_type == TX_8X8)
     count_tokens_8x8(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   else
     count_tokens_16x16(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
 #endif
   return c;
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd) {
   ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
   ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
@@ -417,6 +417,7 @@ int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd) {
   INT16 *qcoeff_ptr = &xd->qcoeff[0];
 
   type = PLANE_TYPE_Y_WITH_DC;
+
   if (seg_active)
       seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
   else
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 918706f657..5fb1a7f3e5 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -17,7 +17,7 @@
 void vp8_reset_mb_tokens_context(MACROBLOCKD *xd);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
 int vp8_decode_mb_tokens_8x8(VP8D_COMP *, MACROBLOCKD *);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 int vp8_decode_mb_tokens_16x16(VP8D_COMP *, MACROBLOCKD *);
 #endif
 
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 15b809e339..ccd6764acf 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -22,7 +22,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) {
   pbi->mb.rtcd                     = &pbi->common.rtcd;
   pbi->dequant.block_2x2           = vp8_dequantize_b_2x2_c;
   pbi->dequant.idct_add_8x8        = vp8_dequant_idct_add_8x8_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   pbi->dequant.idct_add_16x16      = vp8_dequant_idct_add_16x16_c;
 #endif
   pbi->dequant.dc_idct_add_8x8     = vp8_dequant_dc_idct_add_8x8_c;
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index b2a643bc53..0cb00a4c56 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -54,7 +54,7 @@ typedef struct {
 
   vp8_prob const *coef_probs[BLOCK_TYPES];
   vp8_prob const *coef_probs_8x8[BLOCK_TYPES_8X8];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob const *coef_probs_16X16[BLOCK_TYPES_16X16];
 #endif
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 7e667aa635..7d94bec269 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -46,7 +46,7 @@ unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8]
                                   [COEF_BANDS]
                                   [PREV_COEF_CONTEXTS]
                                   [ENTROPY_NODES] [2];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16]
                                     [COEF_BANDS]
                                     [PREV_COEF_CONTEXTS]
@@ -1422,7 +1422,7 @@ void build_coeff_contexts(VP8_COMP *cpi) {
     }
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   //16x16
   for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
     for (j = 0; j < COEF_BANDS; ++j) {
@@ -1794,7 +1794,7 @@ static void update_coef_probs(VP8_COMP *cpi) {
     }
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   // 16x16
   /* dry run to see if update is necessary */
   update[0] = update[1] = 0;
@@ -2304,7 +2304,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
 
   vp8_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs);
   vp8_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16);
 #endif
   vp8_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
@@ -2440,7 +2440,7 @@ void print_tree_update_probs() {
     fprintf(f, "  },\n");
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fprintf(f, "const vp8_prob\n"
           "vp8_coef_update_probs_16x16[BLOCK_TYPES_16X16]\n"
           "                           [COEF_BANDS]\n"
@@ -2474,7 +2474,7 @@ void print_tree_update_probs() {
   f = fopen("treeupdate.bin", "wb");
   fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f);
   fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
 #endif
   fclose(f);
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index d73af4faaf..77980567d0 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -35,12 +35,12 @@ typedef struct {
   unsigned char *quant_shift;
   short *zbin;
   short *zbin_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   short *zbin_16x16;
 #endif
   short *zrun_zbin_boost;
   short *zrun_zbin_boost_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   short *zrun_zbin_boost_16x16;
 #endif
   short *round;
@@ -55,7 +55,7 @@ typedef struct {
 
   int eob_max_offset;
   int eob_max_offset_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   int eob_max_offset_16x16;
 #endif
 } BLOCK;
@@ -173,11 +173,11 @@ typedef struct {
   void (*quantize_b)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
   void (*vp8_short_fdct8x8)(short *input, short *output, int pitch);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   void (*vp8_short_fdct16x16)(short *input, short *output, int pitch);
 #endif
   void (*short_fhaar2x2)(short *input, short *output, int pitch);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
 #endif
   void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 5f46a98046..f5c666e245 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -13,7 +13,7 @@
 #include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 
 #include "vp8/common/blockd.h"
 
@@ -72,6 +72,78 @@ float adst_8[64] = {
 };
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+float dct_16[256] = {
+  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
+  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
+  0.351851,  0.338330,  0.311806,  0.273300,  0.224292,  0.166664,  0.102631,  0.034654,
+ -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
+  0.346760,  0.293969,  0.196424,  0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
+ -0.346760, -0.293969, -0.196424, -0.068975,  0.068975,  0.196424,  0.293969,  0.346760,
+  0.338330,  0.224292,  0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
+  0.102631,  0.273300,  0.351851,  0.311806,  0.166664, -0.034654, -0.224292, -0.338330,
+  0.326641,  0.135299, -0.135299, -0.326641, -0.326641, -0.135299,  0.135299,  0.326641,
+  0.326641,  0.135299, -0.135299, -0.326641, -0.326641, -0.135299,  0.135299,  0.326641,
+  0.311806,  0.034654, -0.273300, -0.338330, -0.102631,  0.224292,  0.351851,  0.166664,
+ -0.166664, -0.351851, -0.224292,  0.102631,  0.338330,  0.273300, -0.034654, -0.311806,
+  0.293969, -0.068975, -0.346760, -0.196424,  0.196424,  0.346760,  0.068975, -0.293969,
+ -0.293969,  0.068975,  0.346760,  0.196424, -0.196424, -0.346760, -0.068975,  0.293969,
+  0.273300, -0.166664, -0.338330,  0.034654,  0.351851,  0.102631, -0.311806, -0.224292,
+  0.224292,  0.311806, -0.102631, -0.351851, -0.034654,  0.338330,  0.166664, -0.273300,
+  0.250000, -0.250000, -0.250000,  0.250000,  0.250000, -0.250000, -0.250000,  0.250000,
+  0.250000, -0.250000, -0.250000,  0.250000,  0.250000, -0.250000, -0.250000,  0.250000,
+  0.224292, -0.311806, -0.102631,  0.351851, -0.034654, -0.338330,  0.166664,  0.273300,
+ -0.273300, -0.166664,  0.338330,  0.034654, -0.351851,  0.102631,  0.311806, -0.224292,
+  0.196424, -0.346760,  0.068975,  0.293969, -0.293969, -0.068975,  0.346760, -0.196424,
+ -0.196424,  0.346760, -0.068975, -0.293969,  0.293969,  0.068975, -0.346760,  0.196424,
+  0.166664, -0.351851,  0.224292,  0.102631, -0.338330,  0.273300,  0.034654, -0.311806,
+  0.311806, -0.034654, -0.273300,  0.338330, -0.102631, -0.224292,  0.351851, -0.166664,
+  0.135299, -0.326641,  0.326641, -0.135299, -0.135299,  0.326641, -0.326641,  0.135299,
+  0.135299, -0.326641,  0.326641, -0.135299, -0.135299,  0.326641, -0.326641,  0.135299,
+  0.102631, -0.273300,  0.351851, -0.311806,  0.166664,  0.034654, -0.224292,  0.338330,
+ -0.338330,  0.224292, -0.034654, -0.166664,  0.311806, -0.351851,  0.273300, -0.102631,
+  0.068975, -0.196424,  0.293969, -0.346760,  0.346760, -0.293969,  0.196424, -0.068975,
+ -0.068975,  0.196424, -0.293969,  0.346760, -0.346760,  0.293969, -0.196424,  0.068975,
+  0.034654, -0.102631,  0.166664, -0.224292,  0.273300, -0.311806,  0.338330, -0.351851,
+  0.351851, -0.338330,  0.311806, -0.273300,  0.224292, -0.166664,  0.102631, -0.034654
+};
+
+float adst_16[256] = {
+  0.033094,  0.065889,  0.098087,  0.129396,  0.159534,  0.188227,  0.215215,  0.240255,
+  0.263118,  0.283599,  0.301511,  0.316693,  0.329007,  0.338341,  0.344612,  0.347761,
+  0.098087,  0.188227,  0.263118,  0.316693,  0.344612,  0.344612,  0.316693,  0.263118,
+  0.188227,  0.098087,  0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612,
+  0.159534,  0.283599,  0.344612,  0.329007,  0.240255,  0.098087, -0.065889, -0.215215,
+ -0.316693, -0.347761, -0.301511, -0.188227, -0.033094,  0.129396,  0.263118,  0.338341,
+  0.215215,  0.338341,  0.316693,  0.159534, -0.065889, -0.263118, -0.347761, -0.283599,
+ -0.098087,  0.129396,  0.301511,  0.344612,  0.240255,  0.033094, -0.188227, -0.329007,
+  0.263118,  0.344612,  0.188227, -0.098087, -0.316693, -0.316693, -0.098087,  0.188227,
+  0.344612,  0.263118,  0.000000, -0.263118, -0.344612, -0.188227,  0.098087,  0.316693,
+  0.301511,  0.301511,  0.000000, -0.301511, -0.301511, -0.000000,  0.301511,  0.301511,
+  0.000000, -0.301511, -0.301511, -0.000000,  0.301511,  0.301511,  0.000000, -0.301511,
+  0.329007,  0.215215, -0.188227, -0.338341, -0.033094,  0.316693,  0.240255, -0.159534,
+ -0.344612, -0.065889,  0.301511,  0.263118, -0.129396, -0.347761, -0.098087,  0.283599,
+  0.344612,  0.098087, -0.316693, -0.188227,  0.263118,  0.263118, -0.188227, -0.316693,
+  0.098087,  0.344612,  0.000000, -0.344612, -0.098087,  0.316693,  0.188227, -0.263118,
+  0.347761, -0.033094, -0.344612,  0.065889,  0.338341, -0.098087, -0.329007,  0.129396,
+  0.316693, -0.159534, -0.301511,  0.188227,  0.283599, -0.215215, -0.263118,  0.240255,
+  0.338341, -0.159534, -0.263118,  0.283599,  0.129396, -0.344612,  0.033094,  0.329007,
+ -0.188227, -0.240255,  0.301511,  0.098087, -0.347761,  0.065889,  0.316693, -0.215215,
+  0.316693, -0.263118, -0.098087,  0.344612, -0.188227, -0.188227,  0.344612, -0.098087,
+ -0.263118,  0.316693,  0.000000, -0.316693,  0.263118,  0.098087, -0.344612,  0.188227,
+  0.283599, -0.329007,  0.098087,  0.215215, -0.347761,  0.188227,  0.129396, -0.338341,
+  0.263118,  0.033094, -0.301511,  0.316693, -0.065889, -0.240255,  0.344612, -0.159534,
+  0.240255, -0.347761,  0.263118, -0.033094, -0.215215,  0.344612, -0.283599,  0.065889,
+  0.188227, -0.338341,  0.301511, -0.098087, -0.159534,  0.329007, -0.316693,  0.129396,
+  0.188227, -0.316693,  0.344612, -0.263118,  0.098087,  0.098087, -0.263118,  0.344612,
+ -0.316693,  0.188227,  0.000000, -0.188227,  0.316693, -0.344612,  0.263118, -0.098087,
+  0.129396, -0.240255,  0.316693, -0.347761,  0.329007, -0.263118,  0.159534, -0.033094,
+ -0.098087,  0.215215, -0.301511,  0.344612, -0.338341,  0.283599, -0.188227,  0.065889,
+  0.065889, -0.129396,  0.188227, -0.240255,  0.283599, -0.316693,  0.338341, -0.347761,
+  0.344612, -0.329007,  0.301511, -0.263118,  0.215215, -0.159534,  0.098087, -0.033094
+};
+#endif
+
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
 static const int xC3S5 = 13623;
@@ -327,11 +399,11 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
 
 }
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_fht_c(short *input, short *output, int pitch,
                TX_TYPE tx_type, int tx_dim) {
   int i, j, k;
-  float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+  float bufa[256], bufb[256]; // buffers are for floating-point test purpose
                              // the implementation could be simplified in
                              // conjunction with integer transform
   short *ip = input;
@@ -359,11 +431,13 @@ void vp8_fht_c(short *input, short *output, int pitch,
   switch(tx_type) {
     case ADST_ADST :
     case ADST_DCT  :
-      ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
+      ptv = (tx_dim == 4) ? &adst_4[0] :
+                            ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
       break;
 
     default :
-      ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
+      ptv = (tx_dim == 4) ? &dct_4[0] :
+                            ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
       break;
   }
 
@@ -387,11 +461,13 @@ void vp8_fht_c(short *input, short *output, int pitch,
   switch(tx_type) {
     case ADST_ADST :
     case  DCT_ADST :
-      pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
+      pth = (tx_dim == 4) ? &adst_4[0] :
+                            ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
       break;
 
     default :
-      pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
+      pth = (tx_dim == 4) ? &dct_4[0] :
+                            ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
       break;
   }
 
@@ -410,11 +486,13 @@ void vp8_fht_c(short *input, short *output, int pitch,
     switch(tx_type) {
       case ADST_ADST :
       case  DCT_ADST :
-        pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
+        pth = (tx_dim == 4) ? &adst_4[0] :
+                              ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
         break;
 
       default :
-        pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
+        pth = (tx_dim == 4) ? &dct_4[0] :
+                              ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
         break;
     }
   }
@@ -608,7 +686,7 @@ void vp8_short_walsh8x4_x8_c(short *input, short *output, int pitch) {
 }
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static const double C1 = 0.995184726672197;
 static const double C2 = 0.98078528040323;
 static const double C3 = 0.956940335732209;
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index 5e71707cf2..de882b1cf8 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -26,12 +26,12 @@
 #endif
 
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_fht_c(short *input, short *output, int pitch,
                TX_TYPE tx_type, int tx_dim);
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_fdct_short16x16
 #define vp8_fdct_short16x16 vp8_short_fdct16x16_c
 #endif
@@ -81,7 +81,7 @@ extern prototype_fdct(vp8_short_walsh4x4_lossless_c);
 
 typedef prototype_fdct(*vp8_fdct_fn_t);
 typedef struct {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_fdct_fn_t    short16x16;
 #endif
   vp8_fdct_fn_t    short8x8;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 6ade0aa78a..41b87f2b0b 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1361,7 +1361,7 @@ static void encode_frame_internal(VP8_COMP *cpi) {
   vp8_zero(cpi->MVcount_hp);
   vp8_zero(cpi->coef_counts);
   vp8_zero(cpi->coef_counts_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_zero(cpi->coef_counts_16x16);
 #endif
 
@@ -1825,7 +1825,7 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
   }
 
   /* test code: set transform size based on mode selection */
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (mbmi->mode <= TM_PRED) {
     mbmi->txfm_size = TX_16X16;
     cpi->t16x16_count++;
@@ -1922,7 +1922,7 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
   set_pred_flag(xd, PRED_REF, ref_pred_flag);
 
   /* test code: set transform size based on mode selection */
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (mbmi->mode <= TM_PRED || mbmi->mode == NEWMV || mbmi->mode == ZEROMV ||
       mbmi->mode == NEARMV ||  mbmi->mode == NEARESTMV) {
     mbmi->txfm_size = TX_16X16;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 053639b9d8..85b8e4f3d3 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -127,6 +127,9 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
   BLOCK *b = &x->block[0];
 
   int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  TX_TYPE txfm_type = x->e_mbd.mode_info_context->bmi[0].as_mode.tx_type;
+#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (x->e_mbd.mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1))
@@ -139,9 +142,22 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
 
   ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
+#if CONFIG_HYBRIDTRANSFORM16X16
+  {
+    if ((x->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) &&
+        (x->q_index < ACTIVE_HT16)) {
+      BLOCKD  *bd = &x->e_mbd.block[0];
+      txfm_map(bd, pred_mode_conv(x->e_mbd.mode_info_context->mbmi.mode));
+      txfm_type = bd->bmi.as_mode.tx_type;
+      vp8_fht_c(b->src_diff, b->coeff, 32, txfm_type, 16);
+    } else
+      vp8_transform_intra_mby_16x16(x);
+  }
+#else
     vp8_transform_intra_mby_16x16(x);
+#endif
   else
 #endif
   if (tx_type == TX_8X8)
@@ -149,7 +165,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
   else
     vp8_transform_intra_mby(x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_quantize_mby_16x16(x);
   else
@@ -160,7 +176,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
     vp8_quantize_mby(x);
 
   if (x->optimize) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16)
       vp8_optimize_mby_16x16(x, rtcd);
     else
@@ -171,9 +187,20 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
       vp8_optimize_mby(x, rtcd);
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
+#if CONFIG_HYBRIDTRANSFORM16X16
+  {
+    if ((x->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) &&
+        (x->q_index < ACTIVE_HT16)) {
+      BLOCKD *bd = &x->e_mbd.block[0];
+      vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, txfm_type, 16);
+    } else
+      vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+  }
+#else
     vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+#endif
   else
 #endif
   if (tx_type == TX_8X8)
@@ -219,7 +246,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
 
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
   int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16) tx_type = TX_8X8; // 16x16 for U and V should default to 8x8 behavior.
 #endif
 #if CONFIG_COMP_INTRA_PRED
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index a26350552f..a66dbe8840 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -300,7 +300,7 @@ void vp8_transform_mby_8x8(MACROBLOCK *x) {
   }
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_transform_mbuv_16x16(MACROBLOCK *x) {
   int i;
 
@@ -880,7 +880,7 @@ void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
 
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void optimize_b_16x16(MACROBLOCK *mb, int i, int type,
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                       const VP8_ENCODER_RTCD *rtcd) {
@@ -1105,7 +1105,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
 
   vp8_subtract_mb(rtcd, x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_transform_mb_16x16(x);
   else
@@ -1115,7 +1115,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
   else
     transform_mb(x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_quantize_mb_16x16(x);
   else
@@ -1126,7 +1126,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
     vp8_quantize_mb(x);
 
   if (x->optimize) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16)
       optimize_mb_16x16(x, rtcd);
     else
@@ -1137,7 +1137,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
       optimize_mb(x, rtcd);
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
   else
@@ -1214,7 +1214,7 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
 
   ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_transform_mby_16x16(x);
   else
@@ -1226,7 +1226,7 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
 
   vp8_quantize_mby(x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
   else
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 653774aaf1..6a0f223c99 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -121,7 +121,7 @@ void vp8_build_dcblock_8x8(MACROBLOCK *b);
 void vp8_optimize_mby_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_transform_mb_16x16(MACROBLOCK *mb);
 void vp8_transform_mby_16x16(MACROBLOCK *x);
 void vp8_transform_mbuv_16x16(MACROBLOCK *x);
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 6a5bf59d50..c4e2617d8e 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -99,7 +99,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) {
   cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
 
   cpi->rtcd.fdct.short8x8                  = vp8_short_fdct8x8_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   cpi->rtcd.fdct.short16x16                = vp8_short_fdct16x16_c;
 #endif
   cpi->rtcd.fdct.haar_short2x2             = vp8_short_fhaar2x2_c;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 85a3c5402a..68c28c3f22 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1196,14 +1196,14 @@ void vp8_set_speed_features(VP8_COMP *cpi) {
   }
 
   if (cpi->sf.improved_dct) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->mb.vp8_short_fdct16x16 = FDCT_INVOKE(&cpi->rtcd.fdct, short16x16);
 #endif
     cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8);
     cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
     cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
   } else {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->mb.vp8_short_fdct16x16 = FDCT_INVOKE(&cpi->rtcd.fdct, short16x16);
 #endif
     cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8);
@@ -1218,7 +1218,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) {
   cpi->mb.quantize_b      = vp8_regular_quantize_b;
   cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
   cpi->mb.quantize_b_8x8  = vp8_regular_quantize_b_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   cpi->mb.quantize_b_16x16= vp8_regular_quantize_b_16x16;
 #endif
   cpi->mb.quantize_b_2x2  = vp8_regular_quantize_b_2x2;
@@ -3684,7 +3684,7 @@ static void encode_frame_to_data_rate
   update_reference_frames(cm);
   vp8_copy(cpi->common.fc.coef_counts, cpi->coef_counts);
   vp8_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
 #endif
   vp8_adapt_coef_probs(&cpi->common);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index bff3cdf6ca..04c7ad4edb 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -99,7 +99,7 @@ typedef struct {
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
   vp8_prob coef_probs_8x8[BLOCK_TYPES_8X8]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob coef_probs_16x16[BLOCK_TYPES_16X16]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
 #endif
@@ -399,7 +399,7 @@ typedef struct VP8_COMP {
   DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]);
   DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]);
   DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]);
   DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]);
@@ -560,7 +560,7 @@ typedef struct VP8_COMP {
   unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
@@ -623,7 +623,7 @@ typedef struct VP8_COMP {
   int skip_false_count[3];
   int t4x4_count;
   int t8x8_count;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   int t16x16_count;
 #endif
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 81b4d12b38..f55018b76d 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -311,7 +311,7 @@ void vp8_quantize_mbuv_8x8(MACROBLOCK *x) {
 
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_quantize_mby_16x16(MACROBLOCK *x) {
   int i;
   for (i = 0; i < 16; i++)
@@ -428,7 +428,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
                                           48, 48, 48, 48, 48, 48, 48, 48,
                                           48, 48, 48, 48, 48, 48, 48, 48
                                         };
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   static const int zbin_boost_16x16[256] = {
      0,  0,  0,  8,  8,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
     30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
@@ -469,7 +469,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
                  cpi->Y1quant_shift[Q] + 0, quant_val);
     cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
     cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
 #endif
     cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
@@ -477,7 +477,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
     cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
     cpi->zrun_zbin_boost_y1_8x8[Q][0] =
       ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
 #endif
 
@@ -487,7 +487,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
                  cpi->Y2quant_shift[Q] + 0, quant_val);
     cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
     cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
 #endif
     cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7;
@@ -495,7 +495,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
     cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
     cpi->zrun_zbin_boost_y2_8x8[Q][0] =
       ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
 #endif
 
@@ -504,7 +504,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
                  cpi->UVquant_shift[Q] + 0, quant_val);
     cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
     cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
 #endif
     cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
@@ -512,7 +512,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
     cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
     cpi->zrun_zbin_boost_uv_8x8[Q][0] =
       ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
 #endif
 
@@ -570,7 +570,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) {
         ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
     }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     // 16x16 structures. Same comment above applies.
     for (i = 1; i < 256; i++) {
       int rc = vp8_default_zig_zag1d_16x16[i];
@@ -626,14 +626,14 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) {
     x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
     x->block[i].zbin = cpi->Y1zbin[QIndex];
     x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex];
 #endif
     x->block[i].round = cpi->Y1round[QIndex];
     x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
     x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
     x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex];
 #endif
     x->block[i].zbin_extra = (short)zbin_extra;
@@ -644,14 +644,14 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) {
         get_segdata(xd, segment_id, SEG_LVL_EOB);
       x->block[i].eob_max_offset_8x8 =
         get_segdata(xd, segment_id, SEG_LVL_EOB);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       x->block[i].eob_max_offset_16x16 =
         get_segdata(xd, segment_id, SEG_LVL_EOB);
 #endif
     } else {
       x->block[i].eob_max_offset = 16;
       x->block[i].eob_max_offset_8x8 = 64;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       x->block[i].eob_max_offset_16x16 = 256;
 #endif
     }
@@ -668,14 +668,14 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) {
     x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
     x->block[i].zbin = cpi->UVzbin[QIndex];
     x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex];
 #endif
     x->block[i].round = cpi->UVround[QIndex];
     x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
     x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
     x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex];
 #endif
 
@@ -703,14 +703,14 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) {
   x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
   x->block[24].zbin = cpi->Y2zbin[QIndex];
   x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex];
 #endif
   x->block[24].round = cpi->Y2round[QIndex];
   x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
   x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
   x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex];
 #endif
   x->block[24].zbin_extra = (short)zbin_extra;
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 98fed4c111..9bd87fa779 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -46,7 +46,7 @@ extern prototype_quantize_block_pair(vp8_quantize_quantb_pair);
 #endif
 extern prototype_quantize_block(vp8_quantize_quantb_8x8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_quantize_quantb_16x16
 #define vp8_quantize_quantb_16x16 vp8_regular_quantize_b_16x16
 #endif
@@ -77,7 +77,7 @@ extern prototype_quantize_mb(vp8_quantize_mby);
 extern prototype_quantize_mb(vp8_quantize_mby_8x8);
 extern prototype_quantize_mb(vp8_quantize_mbuv_8x8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_quantize_mb_16x16(MACROBLOCK *x);
 extern prototype_quantize_block(vp8_quantize_quantb_16x16);
 extern prototype_quantize_mb(vp8_quantize_mby_16x16);
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 80fc16ab40..809279e7de 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -175,7 +175,7 @@ void vp8_save_coding_context(VP8_COMP *cpi) {
 #if CONFIG_SWITCHABLE_INTERP
   vp8_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
 #endif
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
 #endif
 }
@@ -232,7 +232,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi) {
 #if CONFIG_SWITCHABLE_INTERP
   vp8_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
 #endif
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
 #endif
 }
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 2de21e0f35..391254bf26 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -365,7 +365,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) {
     (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8,
     BLOCK_TYPES_8X8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fill_token_costs(
     cpi->mb.token_costs[TX_16X16],
     (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16,
@@ -615,7 +615,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type,
       band = vp8_coef_bands_8x8;
       default_eob = 64;
       break;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     case TX_16X16:
       scan = vp8_default_zig_zag1d_16x16;
       band = vp8_coef_bands_16x16;
@@ -787,7 +787,7 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb,
   *Rate = vp8_rdcost_mby_8x8(mb, 1);
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static int vp8_rdcost_mby_16x16(MACROBLOCK *mb) {
   int cost;
   MACROBLOCKD *xd = &mb->e_mbd;
@@ -813,8 +813,28 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
     mb->e_mbd.predictor,
     mb->block[0].src_stride);
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  if ((mb->e_mbd.mode_info_context->mbmi.mode_rdopt < I8X8_PRED) &&
+      (mb->q_index < ACTIVE_HT16)) {
+    BLOCKD *b  = &mb->e_mbd.block[0];
+    BLOCK  *be = &mb->block[0];
+    txfm_map(b, pred_mode_conv(mb->e_mbd.mode_info_context->mbmi.mode_rdopt));
+    vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 16);
+  } else
+    vp8_transform_mby_16x16(mb);
+#else
   vp8_transform_mby_16x16(mb);
+#endif
+
   vp8_quantize_mby_16x16(mb);
+#if CONFIG_HYBRIDTRANSFORM16X16
+  // TODO(jingning) is it possible to quickly determine whether to force
+  //                trailing coefficients to be zero, instead of running trellis
+  //                optimization in the rate-distortion optimization loop?
+  if (mb->e_mbd.mode_info_context->mbmi.mode_rdopt < I8X8_PRED)
+    vp8_optimize_mby_16x16(mb, rtcd);
+#endif
+
   d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0);
 
   *Distortion = (d >> 2);
@@ -1193,9 +1213,17 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
   int UNINITIALIZED_IS_SAFE(skip);
   MACROBLOCKD *xd = &x->e_mbd;
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int best_txtype, rd_txtype;
+#endif
+
   // Y Search for 16x16 intra prediction mode
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
     mbmi->mode = mode;
+#if CONFIG_HYBRIDTRANSFORM16X16
+    mbmi->mode_rdopt = mode;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
     for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
       mbmi->second_mode = mode2;
@@ -1211,7 +1239,7 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
       }
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       macro_block_yrd_16x16(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd));
 #else
       macro_block_yrd_8x8(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd));
@@ -1222,6 +1250,10 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
 
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+      rd_txtype = x->e_mbd.block[0].bmi.as_mode.tx_type;
+#endif
+
       if (this_rd < best_rd) {
 #if CONFIG_TX16X16
         skip = mby_is_skippable_16x16(xd);
@@ -1236,6 +1268,9 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
         *Rate = rate;
         *rate_y = ratey;
         *Distortion = distortion;
+#if CONFIG_HYBRIDTRANSFORM16X16
+        best_txtype = rd_txtype;
+#endif
       }
 #if CONFIG_COMP_INTRA_PRED
     }
@@ -1244,6 +1279,10 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
 
   *skippable = skip;
   mbmi->mode = mode_selected;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  x->e_mbd.block[0].bmi.as_mode.tx_type = best_txtype;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
   mbmi->second_mode = mode2_selected;
 #endif
@@ -2871,7 +2910,7 @@ static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode,
                             int *rate2, int *distortion2, int *rate_y,
                             int *distortion, int* rate_uv, int *distortion_uv) {
   // Y cost and distortion
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (this_mode == ZEROMV ||
       this_mode == NEARESTMV ||
       this_mode == NEARMV ||
@@ -2883,7 +2922,7 @@ static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode,
       macro_block_yrd_8x8(x, rate_y, distortion, IF_RTCD(&cpi->rtcd));
     else
       macro_block_yrd(x, rate_y, distortion, IF_RTCD(&cpi->rtcd));
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   }
 #endif
 
@@ -2892,7 +2931,7 @@ static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode,
 
   // UV cost and distortion
   if (cpi->common.txfm_mode == ALLOW_8X8
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || this_mode == ZEROMV ||
       this_mode == NEARESTMV ||
       this_mode == NEARMV ||
@@ -3020,6 +3059,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   unsigned int ref_costs[MAX_REF_FRAMES];
   int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1];
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int best_txtype, rd_txtype;
+#endif
+
   vpx_memset(mode8x8, 0, sizeof(mode8x8));
   vpx_memset(&frame_mv, 0, sizeof(frame_mv));
   vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3245,10 +3288,14 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
           // FIXME compound intra prediction
           RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
               (&x->e_mbd);
-#if CONFIG_TX16X16
+
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
           // FIXME: breaks lossless since 4x4 isn't allowed
           macro_block_yrd_16x16(x, &rate_y, &distortion,
                                 IF_RTCD(&cpi->rtcd));
+#if CONFIG_HYBRIDTRANSFORM16X16
+          rd_txtype = x->e_mbd.block[0].bmi.as_mode.tx_type;
+#endif
           rate2 += rate_y;
           distortion2 += distortion;
           rate2 += x->mbmode_cost[x->e_mbd.frame_type][mbmi->mode];
@@ -3509,6 +3556,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
           if (flag)
             continue;
         case ZEROMV:
+
         default:
           break;
       }
@@ -3626,6 +3674,15 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                       && this_mode != B_PRED
                       && this_mode != I8X8_PRED);
 
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
+        if (this_mode <= TM_PRED ||
+            this_mode == NEWMV ||
+            this_mode == ZEROMV ||
+            this_mode == NEARESTMV ||
+            this_mode == NEARMV)
+          mb_skippable = mb_is_skippable_16x16(&x->e_mbd);
+        else
+#endif
         if ((cpi->common.txfm_mode == ALLOW_8X8) && has_y2) {
           if (mbmi->ref_frame != INTRA_FRAME) {
 #if CONFIG_TX16X16
@@ -3718,6 +3775,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
           // Note index of best mode so far
           best_mode_index = mode_index;
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+          best_txtype = rd_txtype;
+#endif
+
           if (this_mode <= B_PRED) {
             if (cpi->common.txfm_mode == ALLOW_8X8
                 && this_mode != B_PRED
@@ -3869,6 +3930,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     }
   }
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  if (best_mbmode.mode < I8X8_PRED)
+    xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
+#endif
+
   if (best_mbmode.mode == I8X8_PRED)
     set_i8x8_block_modes(x, mode8x8);
 
@@ -3956,6 +4022,10 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
   int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8;
   int y_intra16x16_skippable;
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int best_txtype;
+#endif
+
   mbmi->ref_frame = INTRA_FRAME;
   rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv,
                           &uv_intra_skippable);
@@ -3981,8 +4051,12 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
                                           &rate16x16_tokenonly, &dist16x16,
                                           &y_intra16x16_skippable);
   mode16x16 = mbmi->mode;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  best_txtype = xd->block[0].bmi.as_mode.tx_type;
+  xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
+#endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
   mbmi->mode_rdopt = I8X8_PRED;
 #endif
 
@@ -4041,6 +4115,10 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
       mbmi->mode = mode16x16;
       rate = rate16x16 + rateuv8x8;
       dist = dist16x16 + (distuv8x8 >> 2);
+#if CONFIG_HYBRIDTRANSFORM16X16
+      // save this into supermacroblock coding decision buffer
+      xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
+#endif
     }
     if (cpi->common.mb_no_coeff_skip)
       rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0);
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 6a6afeac1d..d0c3152836 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -26,14 +26,14 @@
 #ifdef ENTROPY_STATS
 INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 #endif
 extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
 extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
 #endif
@@ -42,7 +42,7 @@ void vp8_stuff_mb(VP8_COMP *cpi,
                   MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);
 void vp8_stuff_mb_8x8(VP8_COMP *cpi,
                       MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *xd,
                         TOKENEXTRA **t, int dry_run);
 #endif
@@ -112,7 +112,7 @@ static void fill_value_tokens() {
   vp8_dct_value_cost_ptr   = dct_value_cost + DCT_MAX_VALUE;
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static void tokenize1st_order_b_16x16(MACROBLOCKD *xd,
                                       const BLOCKD *const b,
                                       TOKENEXTRA **tp,
@@ -764,7 +764,7 @@ int mb_is_skippable_8x8(MACROBLOCKD *xd) {
   return (mby_is_skippable_8x8(xd) & mbuv_is_skippable_8x8(xd));
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 int mby_is_skippable_16x16(MACROBLOCKD *xd) {
   int skip = 1;
   //skip &= (xd->block[0].eob < 2); // I think this should be commented? No second order == DC must be coded
@@ -811,12 +811,12 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
   has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED
                   && xd->mode_info_context->mbmi.mode != I8X8_PRED
                   && xd->mode_info_context->mbmi.mode != SPLITMV);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16) has_y2_block = 0; // Because of inter frames
 #endif
 
   switch (tx_type) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     case TX_16X16:
       xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd);
       break;
@@ -833,7 +833,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
     if (!dry_run)
       cpi->skip_true_count[mb_skip_context] += skip_inc;
     if (!cpi->common.mb_no_coeff_skip) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       if (tx_type == TX_16X16)
         vp8_stuff_mb_16x16(cpi, xd, t, dry_run);
       else
@@ -869,12 +869,14 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
     plane_type = 0;
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16) {
     ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context;
     ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context;
+
     tokenize1st_order_b_16x16(xd, xd->block, t, 3,
                               xd->frame_type, A, L, cpi, dry_run);
+
     for (b = 1; b < 16; b++) {
       *(A + vp8_block2above[b]) = *(A);
       *(L + vp8_block2left[b] ) = *(L);
@@ -955,13 +957,13 @@ void init_context_counters(void) {
   if (!f) {
     vpx_memset(context_counters, 0, sizeof(context_counters));
     vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8));
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16));
 #endif
   } else {
     fread(context_counters, sizeof(context_counters), 1, f);
     fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
 #endif
     fclose(f);
@@ -971,13 +973,13 @@ void init_context_counters(void) {
   if (!f) {
     vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist));
     vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8));
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16));
 #endif
   } else {
     fread(tree_update_hist, sizeof(tree_update_hist), 1, f);
     fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
 #endif
     fclose(f);
@@ -1055,7 +1057,7 @@ void print_context_counters() {
   } while (++type < BLOCK_TYPES_8X8);
   fprintf(f, "\n};\n");
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fprintf(f, "static const unsigned int\nvp8_default_coef_counts_16x16"
           "[BLOCK_TYPES_16X16] [COEF_BANDS]"
           "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {");
@@ -1158,7 +1160,7 @@ void print_context_counters() {
   } while (++type < BLOCK_TYPES_8X8);
   fprintf(f, "\n};\n");
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fprintf(f, "static const vp8_prob\n"
           "vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS]\n"
           "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {");
@@ -1198,7 +1200,7 @@ void print_context_counters() {
   f = fopen("context.bin", "wb");
   fwrite(context_counters, sizeof(context_counters), 1, f);
   fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
 #endif
   fclose(f);
@@ -1340,7 +1342,7 @@ void vp8_stuff_mb_8x8(VP8_COMP *cpi,
 }
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static __inline
 void stuff1st_order_b_16x16(const BLOCKD *const b,
                             TOKENEXTRA **tp,
@@ -1496,7 +1498,7 @@ void vp8_fix_contexts(MACROBLOCKD *xd) {
   if ((xd->mode_info_context->mbmi.mode != B_PRED
       && xd->mode_info_context->mbmi.mode != I8X8_PRED
       && xd->mode_info_context->mbmi.mode != SPLITMV)
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || xd->mode_info_context->mbmi.txfm_size == TX_16X16
 #endif
       ) {
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index af34337ff7..f043e64cfd 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -48,7 +48,7 @@ extern INT64 context_counters[BLOCK_TYPES][COEF_BANDS]
                              [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
 extern INT64 context_counters_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
                                  [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern INT64 context_counters_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
                                    [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
 #endif
-- 
GitLab