diff --git a/vp8/common/idct.h b/vp8/common/idct.h index 5336f5ab0d645265852285a3ef7f1e3b24f306e3..2a410c34ee1f7e3091b2fd177acfea84b1e88e3e 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -111,9 +111,10 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c); extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c); #endif -#if CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM #include "vp8/common/blockd.h" -void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type); +void vp8_ihtllm_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim); #endif diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index 616e4938ecd970c1f825d0de7b4b5ed2ee159e9c..5c7bf78d2984c22f2b9745004ee7989d761c8d89 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -93,120 +93,17 @@ float iadst_8[64] = { }; #endif -#if CONFIG_HYBRIDTRANSFORM -void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { - int i, j, k; - float bufa[16], bufb[16]; // buffers are for floating-point test purpose - // the implementation could be simplified in - // conjunction with integer transform - short *ip = input; - short *op = output; - int shortpitch = pitch >> 1; - - float *pfa = &bufa[0]; - float *pfb = &bufb[0]; - - // pointers to vertical and horizontal transforms - float *ptv, *pth; - - // load and convert residual array into floating-point - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = (float)ip[i]; - } - pfa += 4; - ip += 4; - } - - // vertical transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case ADST_DCT : - ptv = &iadst_4[0]; - break; - - default : - ptv = &idct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfb[i] = 0 ; - for(k = 0; k < 4; k++) { - pfb[i] += ptv[k] * pfa[(k<<2)]; - } - pfa += 1; - } - - pfb += 4; - ptv += 4; - pfa = &bufa[0]; - } - - // horizontal transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &iadst_4[0]; - break; - - default : - pth = &idct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = 0; - for(k = 0; k < 4; k++) { - pfa[i] += pfb[k] * pth[k]; - } - pth += 4; - } - - pfa += 4; - pfb += 4; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &iadst_4[0]; - break; - - default : - pth = &idct_4[0]; - break; - } - } - - // convert to short integer format and load BLOCKD buffer - op = output; - pfa = &bufa[0]; - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) : - -(short)( - pfa[i] / 8 + 0.49); - } - op += shortpitch; - pfa += 4; - } -} -#endif - -#if CONFIG_HYBRIDTRANSFORM8X8 -void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +void vp8_ihtllm_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim) { int i, j, k; float bufa[64], bufb[64]; // buffers are for floating-point test purpose // the implementation could be simplified in // conjunction with integer transform + + // further notice, since we are thinking to use one + // function for both 4x4 and 8x8 transforms, the + // temporary buffers are simply initialized with 64. short *ip = input; short *op = output; int shortpitch = pitch >> 1; @@ -218,12 +115,12 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { float *ptv, *pth; // load and convert residual array into floating-point - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = (float)ip[i]; } - pfa += 8; - ip += 8; + pfa += tx_dim; + ip += tx_dim; } // vertical transformation @@ -233,25 +130,25 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case ADST_DCT : - ptv = &iadst_8[0]; + ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; break; default : - ptv = &idct_8[0]; + ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfb[i] = 0 ; - for(k = 0; k < 8; k++) { - pfb[i] += ptv[k] * pfa[(k<<3)]; + for(k = 0; k < tx_dim; k++) { + pfb[i] += ptv[k] * pfa[(k * tx_dim)]; } pfa += 1; } - pfb += 8; - ptv += 8; + pfb += tx_dim; + ptv += tx_dim; pfa = &bufa[0]; } @@ -262,34 +159,34 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &iadst_8[0]; + pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; break; default : - pth = &idct_8[0]; + pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = 0; - for(k = 0; k < 8; k++) { + for(k = 0; k < tx_dim; k++) { pfa[i] += pfb[k] * pth[k]; } - pth += 8; + pth += tx_dim; } - pfa += 8; - pfb += 8; + pfa += tx_dim; + pfb += tx_dim; switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &iadst_8[0]; + pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; break; default : - pth = &idct_8[0]; + pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; break; } } @@ -298,13 +195,14 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { op = output; pfa = &bufa[0]; - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) : -(short)( - pfa[i] / 8 + 0.49); } + op += shortpitch; - pfa += 8; + pfa += tx_dim; } } #endif diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index de9aad58d96358865aa90f068eff5b70b7c18a7d..1357839c791a743f1038e91eec13bdc6e7f2f686 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -33,7 +33,7 @@ static void recon_dcblock_8x8(MACROBLOCKD *x) { #if CONFIG_HYBRIDTRANSFORM void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) { - vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type); + vp8_ihtllm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type, 4); } #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 057104f497de828b1985c35bce56698164210854..59f453edff0547a8bb36b705f12307da2074b89a 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -392,7 +392,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, txfm_map(b, pred_mode_conv(i8x8mode)); vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type, q, dq, pre, dst, 16, stride); - // vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); q += 64; #else for (j = 0; j < 4; j++) { diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index bf44fd61ac109dd8c310a76170ffc14238b7162b..6164c44d534130725b556f2261bf0a927d4f00b5 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -55,7 +55,7 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, input[i] = dq[i] * input[i]; } - vp8_iht4x4llm_c( input, output, 4 << 1, tx_type ); + vp8_ihtllm_c(input, output, 4 << 1, tx_type, 4); vpx_memset(input, 0, 32); @@ -95,7 +95,7 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, input[i] = dq[1] * input[i]; } - vp8_iht8x8llm_c(input, output, 16, tx_type); + vp8_ihtllm_c(input, output, 16, tx_type, 8); vpx_memset(input, 0, 128); @@ -117,9 +117,10 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, diff_ptr += 8; pred += pitch; } - diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4; - dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4; - pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4; + // shift buffer pointers to next 4x4 block in the submacroblock + diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4; + dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4; + pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4; } } #endif diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index ad525855265f723df2e3fadfd0541bc528e181ea..ae1912903c951e8942b8c0e6e4cdef74e513db63 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -329,114 +329,9 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8 } -#if CONFIG_HYBRIDTRANSFORM -void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) { - int i, j, k; - float bufa[16], bufb[16]; // buffers are for floating-point test purpose - // the implementation could be simplified in - // conjunction with integer transform - short *ip = input; - short *op = output; - - float *pfa = &bufa[0]; - float *pfb = &bufb[0]; - - // pointers to vertical and horizontal transforms - float *ptv, *pth; - - // load and convert residual array into floating-point - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = (float)ip[i]; - } - pfa += 4; - ip += pitch / 2; - } - - // vertical transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case ADST_DCT : - ptv = &adst_4[0]; - break; - - default : - ptv = &dct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfb[i] = 0; - for(k = 0; k < 4; k++) { - pfb[i] += ptv[k] * pfa[(k<<2)]; - } - pfa += 1; - } - pfb += 4; - ptv += 4; - pfa = &bufa[0]; - } - - // horizontal transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &adst_4[0]; - break; - - default : - pth = &dct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = 0; - for(k = 0; k < 4; k++) { - pfa[i] += pfb[k] * pth[k]; - } - pth += 4; - } - - pfa += 4; - pfb += 4; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &adst_4[0]; - break; - - default : - pth = &dct_4[0]; - break; - } - } - - // convert to short integer format and load BLOCKD buffer - op = output ; - pfa = &bufa[0] ; - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) : - -(short)(- 8 * pfa[i] + 0.49); - } - op += 4; - pfa += 4; - } -} -#endif - -#if CONFIG_HYBRIDTRANSFORM8X8 -void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +void vp8_fht_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim) { int i, j, k; float bufa[64], bufb[64]; // buffers are for floating-point test purpose // the implementation could be simplified in @@ -451,11 +346,11 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { float *ptv, *pth; // load and convert residual array into floating-point - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = (float)ip[i]; } - pfa += 8; + pfa += tx_dim; ip += pitch / 2; } @@ -466,24 +361,24 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case ADST_DCT : - ptv = &adst_8[0]; + ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0]; break; default : - ptv = &dct_8[0]; + ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfb[i] = 0; - for(k = 0; k < 8; k++) { - pfb[i] += ptv[k] * pfa[(k<<3)]; + for(k = 0; k < tx_dim; k++) { + pfb[i] += ptv[k] * pfa[(k * tx_dim)]; } pfa += 1; } - pfb += 8; - ptv += 8; + pfb += tx_dim; + ptv += tx_dim; pfa = &bufa[0]; } @@ -494,34 +389,34 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &adst_8[0]; + pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0]; break; default : - pth = &dct_8[0]; + pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = 0; - for(k = 0; k < 8; k++) { + for(k = 0; k < tx_dim; k++) { pfa[i] += pfb[k] * pth[k]; } - pth += 8; + pth += tx_dim; } - pfa += 8; - pfb += 8; + pfa += tx_dim; + pfb += tx_dim; switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &adst_8[0]; + pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0]; break; default : - pth = &dct_8[0]; + pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0]; break; } } @@ -530,13 +425,13 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { op = output ; pfa = &bufa[0] ; - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) : -(short)(- 8 * pfa[i] + 0.49); } - op += 8; - pfa += 8; + op += tx_dim; + pfa += tx_dim; } } #endif @@ -582,14 +477,6 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { } } -#if CONFIG_HYBRIDTRANSFORM -void vp8_fht8x4_c(short *input, short *output, int pitch, - TX_TYPE tx_type) { - vp8_fht4x4_c(input, output, pitch, tx_type); - vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type); -} -#endif - void vp8_short_fdct8x4_c(short *input, short *output, int pitch) { vp8_short_fdct4x4_c(input, output, pitch); diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index 9936969d5a42c23a8051cb6b3cc54c38e84141d3..2d7b61754585200d552a53872595571db3243db3 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -23,9 +23,9 @@ #endif -#if CONFIG_HYBRIDTRANSFORM -void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type); -void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type); +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +void vp8_fht_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim); #endif #if CONFIG_TX16X16 diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 964046d926f3646ac6a6895c2f8201abdbf263db..c4049016362727543c09eb9a1378a517a1c8aed5 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -91,8 +91,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, if(active_ht) { b->bmi.as_mode.test = b->bmi.as_mode.first; txfm_map(b, b->bmi.as_mode.first); - - vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type); + vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4); vp8_ht_quantize_b(be, b); vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ; } else { @@ -317,16 +316,11 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, vp8_subtract_4b_c(be, b, 16); txfm_map(b, pred_mode_conv(b->bmi.as_mode.first)); - - vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, - b->bmi.as_mode.tx_type); + vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, + b->bmi.as_mode.tx_type, 8); x->quantize_b_8x8(x->block + idx, xd->block + idx); - vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, - b->bmi.as_mode.tx_type); - -// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); -// x->quantize_b_8x8(x->block + idx, xd->block + idx); -// vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); + vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, + b->bmi.as_mode.tx_type, 8); // reconstruct submacroblock for (i = 0; i < 4; i++) { diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 3f2b8e85c6870a5a8743515d4a005e544529795f..67bf33d6fdbbdba21bdbe2b338193bd57df55abf 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -612,20 +612,20 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) { switch (b->bmi.as_mode.tx_type) { case ADST_DCT: - pt_scan = vp8_row_scan; + scan = vp8_row_scan; break; case DCT_ADST: - pt_scan = vp8_col_scan; + scan = vp8_col_scan; break; default: - pt_scan = vp8_default_zig_zag1d; + scan = vp8_default_zig_zag1d; break; } } else - pt_scan = vp8_default_zig_zag1d; + scan = vp8_default_zig_zag1d; } #endif break; @@ -937,8 +937,7 @@ static int64_t rd_pick_intra4x4block( if(active_ht) { b->bmi.as_mode.test = mode; txfm_map(b, mode); - - vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type); + vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4); vp8_ht_quantize_b(be, b); } else { x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); @@ -991,7 +990,7 @@ static int64_t rd_pick_intra4x4block( // inverse transform if(active_ht) { - vp8_iht4x4llm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type ); + vp8_ihtllm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type, 4); } else { IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32); @@ -1230,8 +1229,8 @@ static int64_t rd_pick_intra8x8block( #if CONFIG_HYBRIDTRANSFORM8X8 txfm_map(b, pred_mode_conv(mode)); - vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, b->bmi.as_mode.tx_type); -// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, + b->bmi.as_mode.tx_type, 8); x->quantize_b_8x8(x->block + idx, xd->block + idx); // compute quantization mse of 8x8 block