diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index be3620bef78fdd7d0ef6d62013e41c53a12fd407..c622562d57f972a17e677063658fc656f8ff0fe2 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -448,6 +448,9 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
 fi
 
 # fdct functions
+prototype void vp9_fht "const short *input, int pitch, short *output, int tx_type, int tx_dim"
+specialize vp9_fht
+
 prototype void vp9_short_fdct8x8 "short *InputData, short *OutputData, int pitch"
 specialize vp9_short_fdct8x8
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 1757f24672e90ee9f3e23ce7dfabc6094eb95906..bd8539e4f45358eb44e96c1393d34d6c000e5357 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -73,7 +73,7 @@ void vp9_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
 
   tx_type = get_tx_type(&x->e_mbd, b);
   if (tx_type != DCT_DCT) {
-    vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
+    vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
     vp9_ht_quantize_b_4x4(be, b, tx_type);
     vp8_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4);
   } else {
@@ -114,7 +114,7 @@ void vp9_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
     BLOCKD  *bd = &xd->block[0];
     tx_type = get_tx_type(xd, bd);
     if (tx_type != DCT_DCT) {
-      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
+      vp9_fht(b->src_diff, 32, b->coeff, tx_type, 16);
       vp9_quantize_mby_16x16(x);
       if (x->optimize)
         vp9_optimize_mby_16x16(x, rtcd);
@@ -205,7 +205,7 @@ void vp9_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
 
     tx_type = get_tx_type(xd, xd->block + idx);
     if (tx_type != DCT_DCT) {
-      vp9_fht_c(be->src_diff, 32, (x->block + idx)->coeff,
+      vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,
                 tx_type, 8);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
       vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 18e596efcd90411be804ac0c42128f1a3ab53683..9e11733a6fe8f34a0e9b65e46dcae675c4d442f4 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -833,7 +833,7 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
 
   tx_type = get_tx_type_16x16(xd, b);
   if (tx_type != DCT_DCT) {
-    vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 16);
+    vp9_fht(be->src_diff, 32, be->coeff, tx_type, 16);
   } else
     vp9_transform_mby_16x16(mb);
 
@@ -1115,7 +1115,7 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
       b->bmi.as_mode.first = mode;
       tx_type = get_tx_type_4x4(xd, b);
       if (tx_type != DCT_DCT) {
-        vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
+        vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
         vp9_ht_quantize_b_4x4(be, b, tx_type);
       } else {
         x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
@@ -1440,7 +1440,7 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
       if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
         TX_TYPE tx_type = get_tx_type_8x8(xd, b);
         if (tx_type != DCT_DCT)
-          vp9_fht_c(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
+          vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
         else
           x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
         x->quantize_b_8x8(x->block + idx, xd->block + idx);