diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 10b83f58b1564cc917d6d3a61dc4142797abbaaf..99d84c9cad16e689f77758bda0881cccdcdd21ee 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -1284,3 +1284,93 @@ void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
     dest += dest_stride;
   }
 }
+
+// idct
+void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+  if (eob > 1)
+    vp9_short_idct4x4_add(input, dest, stride);
+  else
+    vp9_short_idct4x4_1_add(input, dest, stride);
+}
+
+
+void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, int stride,
+                             int eob) {
+  if (eob > 1)
+    vp9_short_iwalsh4x4_add(input, dest, stride);
+  else
+    vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
+}
+
+void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) {
+  // If dc is 1, then input[0] is the reconstructed value, do not need
+  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to decide what to do.
+  // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
+  // Combine that with code here.
+  if (eob) {
+    if (eob == 1)
+      // DC only DCT coefficient
+      vp9_short_idct8x8_1_add(input, dest, stride);
+    else if (eob <= 10)
+      vp9_short_idct8x8_10_add(input, dest, stride);
+    else
+      vp9_short_idct8x8_add(input, dest, stride);
+  }
+}
+
+void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob) {
+  /* The calculation can be simplified if there are not many non-zero dct
+   * coefficients. Use eobs to separate different cases. */
+  if (eob) {
+    if (eob == 1)
+      /* DC only DCT coefficient. */
+      vp9_short_idct16x16_1_add(input, dest, stride);
+    else if (eob <= 10)
+      vp9_short_idct16x16_10_add(input, dest, stride);
+    else
+      vp9_short_idct16x16_add(input, dest, stride);
+  }
+}
+
+void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob) {
+  if (eob) {
+    if (eob == 1)
+      vp9_short_idct32x32_1_add(input, dest, stride);
+    else
+      vp9_short_idct32x32_add(input, dest, stride);
+  }
+}
+
+// iht
+void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
+                   int eob) {
+  if (tx_type == DCT_DCT)
+    vp9_idct_add(input, dest, stride, eob);
+  else
+    vp9_short_iht4x4_add(input, dest, stride, tx_type);
+}
+
+void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
+                       int stride, int eob) {
+  if (tx_type == DCT_DCT) {
+    vp9_idct_add_8x8(input, dest, stride, eob);
+  } else {
+    if (eob > 0) {
+      vp9_short_iht8x8_add(input, dest, stride, tx_type);
+    }
+  }
+}
+
+void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
+                         int stride, int eob) {
+  if (tx_type == DCT_DCT) {
+    vp9_idct_add_16x16(input, dest, stride, eob);
+  } else {
+    if (eob > 0) {
+      vp9_short_iht16x16_add(input, dest, stride, tx_type);
+    }
+  }
+}
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 59892cd03ec989523802b5df7dd833fa28d05107..0ef905cc8e9501a2c13dfd1378cca4548f3ee376 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -16,6 +16,7 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_enums.h"
 
 
 // Constants and Macros used by all idct/dct functions
@@ -86,4 +87,22 @@ typedef struct {
   transform_1d cols, rows;  // vertical and horizontal
 } transform_2d;
 
+
+void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_idct_add_lossless(int16_t *input, uint8_t *dest,
+                           int stride, int eob);
+void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);
+
+void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
+                 int stride, int eob);
+
+void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
+                     int stride, int eob);
+
+void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
+                       int stride, int eob);
+
+
 #endif  // VP9_COMMON_VP9_IDCT_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 225305b19af357114e276313b24013d17b5e2f41..64c80ede989de55556509ae038004e8dccf97528 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -27,22 +27,6 @@ forward_decls vp9_common_forward_decls
 # this variable is for functions that are 64 bit only.
 [ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3
 
-#
-# Dequant
-#
-
-prototype void vp9_idct_add_16x16 "int16_t *input, uint8_t *dest, int stride, int eob"
-specialize vp9_idct_add_16x16
-
-prototype void vp9_idct_add_8x8 "int16_t *input, uint8_t *dest, int stride, int eob"
-specialize vp9_idct_add_8x8
-
-prototype void vp9_idct_add "int16_t *input, uint8_t *dest, int stride, int eob"
-specialize vp9_idct_add
-
-prototype void vp9_idct_add_32x32 "int16_t *q, uint8_t *dst, int stride, int eob"
-specialize vp9_idct_add_32x32
-
 #
 # RECON
 #
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 018f94954d72feef9ef35b32dfa0dc3fdb01ccc6..785beb7113e0ae473cda97fdb8d0d014e2d4a6cd 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -19,6 +19,7 @@
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_extend.h"
+#include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_reconintra.h"
@@ -31,7 +32,6 @@
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_decodemv.h"
 #include "vp9/decoder/vp9_dsubexp.h"
-#include "vp9/decoder/vp9_idct_blk.h"
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_read_bit_buffer.h"
 #include "vp9/decoder/vp9_thread.h"
@@ -104,15 +104,15 @@ static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
         if (tx_type == DCT_DCT)
           xd->itxm_add(qcoeff, dst, stride, eob);
         else
-          vp9_iht_add_c(tx_type, qcoeff, dst, stride, eob);
+          vp9_iht_add(tx_type, qcoeff, dst, stride, eob);
         break;
       case TX_8X8:
         tx_type = get_tx_type_8x8(pd->plane_type, xd);
-        vp9_iht_add_8x8_c(tx_type, qcoeff, dst, stride, eob);
+        vp9_iht_add_8x8(tx_type, qcoeff, dst, stride, eob);
         break;
       case TX_16X16:
         tx_type = get_tx_type_16x16(pd->plane_type, xd);
-        vp9_iht_add_16x16_c(tx_type, qcoeff, dst, stride, eob);
+        vp9_iht_add_16x16(tx_type, qcoeff, dst, stride, eob);
         break;
       case TX_32X32:
         tx_type = DCT_DCT;
@@ -495,7 +495,7 @@ static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) {
                  cm->uv_dc_delta_q == 0 &&
                  cm->uv_ac_delta_q == 0;
 
-  xd->itxm_add = xd->lossless ? vp9_idct_add_lossless_c
+  xd->itxm_add = xd->lossless ? vp9_idct_add_lossless
                               : vp9_idct_add;
 }
 
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
deleted file mode 100644
index a0af000b0f5cfa5d44d5546dc41d66c36d8dfdd7..0000000000000000000000000000000000000000
--- a/vp9/decoder/vp9_idct_blk.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/decoder/vp9_idct_blk.h"
-
-void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
-                   int eob) {
-  if (tx_type == DCT_DCT)
-    vp9_idct_add(input, dest, stride, eob);
-  else
-    vp9_short_iht4x4_add(input, dest, stride, tx_type);
-}
-
-void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
-                       int stride, int eob) {
-  if (tx_type == DCT_DCT) {
-    vp9_idct_add_8x8(input, dest, stride, eob);
-  } else {
-    if (eob > 0) {
-      vp9_short_iht8x8_add(input, dest, stride, tx_type);
-    }
-  }
-}
-
-void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) {
-  if (eob > 1)
-    vp9_short_idct4x4_add(input, dest, stride);
-  else
-    vp9_short_idct4x4_1_add(input, dest, stride);
-}
-
-void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride,
-                             int eob) {
-  if (eob > 1)
-    vp9_short_iwalsh4x4_add(input, dest, stride);
-  else
-    vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
-}
-
-void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
-  // If dc is 1, then input[0] is the reconstructed value, do not need
-  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
-
-  // The calculation can be simplified if there are not many non-zero dct
-  // coefficients. Use eobs to decide what to do.
-  // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
-  // Combine that with code here.
-  if (eob) {
-    if (eob == 1)
-      // DC only DCT coefficient
-      vp9_short_idct8x8_1_add(input, dest, stride);
-    else if (eob <= 10)
-      vp9_short_idct8x8_10_add(input, dest, stride);
-    else
-      vp9_short_idct8x8_add(input, dest, stride);
-  }
-}
-
-void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
-                         int stride, int eob) {
-  if (tx_type == DCT_DCT) {
-    vp9_idct_add_16x16(input, dest, stride, eob);
-  } else {
-    if (eob > 0) {
-      vp9_short_iht16x16_add(input, dest, stride, tx_type);
-    }
-  }
-}
-
-void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
-  /* The calculation can be simplified if there are not many non-zero dct
-   * coefficients. Use eobs to separate different cases. */
-  if (eob) {
-    if (eob == 1)
-      /* DC only DCT coefficient. */
-      vp9_short_idct16x16_1_add(input, dest, stride);
-    else if (eob <= 10)
-      vp9_short_idct16x16_10_add(input, dest, stride);
-    else
-      vp9_short_idct16x16_add(input, dest, stride);
-  }
-}
-
-void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) {
-  if (eob) {
-    if (eob == 1)
-      vp9_short_idct32x32_1_add(input, dest, stride);
-    else
-      vp9_short_idct32x32_add(input, dest, stride);
-  }
-}
-
diff --git a/vp9/decoder/vp9_idct_blk.h b/vp9/decoder/vp9_idct_blk.h
deleted file mode 100644
index 00f1bc6a60047e7306a7b36f918861c768606269..0000000000000000000000000000000000000000
--- a/vp9/decoder/vp9_idct_blk.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_DECODER_VP9_IDCT_BLK_H_
-#define VP9_DECODER_VP9_IDCT_BLK_H_
-
-#include "vp9/common/vp9_blockd.h"
-
-void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest,
-                             int stride, int eob);
-
-void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
-                   int stride, int eob);
-
-void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
-                       int stride, int eob);
-
-void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
-                         int stride, int eob);
-
-#endif  // VP9_DECODER_VP9_IDCT_BLK_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 76a5d33e7b89f777f9d8a32ca32170d3da4f434c..a610d634083704003f3cf549a8252017cc4e1141 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -8,16 +8,21 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+
+#include "./vp9_rtcd.h"
 #include "./vpx_config.h"
-#include "vp9/encoder/vp9_encodemb.h"
+
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_reconinter.h"
-#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/encoder/vp9_tokenize.h"
 #include "vp9/common/vp9_reconintra.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/common/vp9_systemdependent.h"
-#include "vp9_rtcd.h"
+
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_tokenize.h"
 
 DECLARE_ALIGNED(16, extern const uint8_t,
                 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
@@ -47,28 +52,6 @@ static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob,
     xd->inv_txm4x4_add(dqcoeff, dest, stride);
 }
 
-static void inverse_transform_b_8x8_add(int eob,
-                                        int16_t *dqcoeff, uint8_t *dest,
-                                        int stride) {
-  if (eob <= 1)
-    vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
-  else if (eob <= 10)
-    vp9_short_idct8x8_10_add(dqcoeff, dest, stride);
-  else
-    vp9_short_idct8x8_add(dqcoeff, dest, stride);
-}
-
-static void inverse_transform_b_16x16_add(int eob,
-                                          int16_t *dqcoeff, uint8_t *dest,
-                                          int stride) {
-  if (eob <= 1)
-    vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
-  else if (eob <= 10)
-    vp9_short_idct16x16_10_add(dqcoeff, dest, stride);
-  else
-    vp9_short_idct16x16_add(dqcoeff, dest, stride);
-}
-
 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   struct macroblock_plane *const p = &x->plane[plane];
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -476,12 +459,10 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
       vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
       break;
     case TX_16X16:
-      inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst,
-                                    pd->dst.stride);
+      vp9_idct_add_16x16(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
       break;
     case TX_8X8:
-      inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst,
-                                  pd->dst.stride);
+      vp9_idct_add_8x8(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
       break;
     case TX_4X4:
       // this is like vp9_short_idct4x4 but has a special case around eob<=1
@@ -597,12 +578,8 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
-      if (!x->skip_encode && *eob) {
-        if (tx_type == DCT_DCT)
-          inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride);
-        else
-          vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
-      }
+      if (!x->skip_encode && *eob)
+        vp9_iht_add_16x16(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
       break;
     case TX_8X8:
       tx_type = get_tx_type_8x8(pd->plane_type, xd);
@@ -626,12 +603,8 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                      p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
-      if (!x->skip_encode && *eob) {
-        if (tx_type == DCT_DCT)
-          inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride);
-        else
-          vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type);
-      }
+      if (!x->skip_encode && *eob)
+        vp9_iht_add_8x8(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
       break;
     case TX_4X4:
       tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index 54c9706a53d86d7cf3e7e1a73bbe77c43b64cd4a..3a27cdd04c3b76495a4e93d766e90c5c0bdd058d 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -32,8 +32,6 @@ VP9_DX_SRCS-yes += decoder/vp9_thread.c
 VP9_DX_SRCS-yes += decoder/vp9_thread.h
 VP9_DX_SRCS-yes += decoder/vp9_treereader.h
 VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
-VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c
-VP9_DX_SRCS-yes += decoder/vp9_idct_blk.h
 VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c
 VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h