diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c index 4521bfc3159b75d8ff26cca60145edd16c762675..bfceab16c0f17ce3389553ad10276f84c0d7d10b 100644 --- a/vp8/encoder/arm/csystemdependent.c +++ b/vp8/encoder/arm/csystemdependent.c @@ -63,7 +63,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon; + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/ #elif HAVE_ARMV6 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index b55bc51cb177dcb44e97277c75e7176e1fc362ca..19d307d26f090fb141b3feb8ca4ceaa205a298ca 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -33,6 +33,7 @@ typedef struct // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries short(*quant)[4]; + short(*quant_shift)[4]; short(*zbin)[4]; short(*zrun_zbin_boost); short(*round)[4]; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 32cef1db1e1212726747a629f13b6897a73ef78e..a05b33268dec5d64149ef9aeb9748832e3b54a8d 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -103,6 +103,18 @@ static const int qzbin_factors[129] = 80, }; +static void vp8cx_invert_quant(short *quant, short *shift, short d) +{ + unsigned t; + int l; + t = d; + for(l = 0; t > 1; l++) + t>>=1; + t = 1 + (1<<(16+l))/d; + *quant = (short)(t - (1<<16)); + *shift = l; +} + void vp8cx_init_quantizer(VP8_COMP *cpi) { int r, c; @@ -116,21 +128,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) { // dc values quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); - cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0, + cpi->Y1quant_shift[Q][0] + 0, quant_val); cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.Y1dequant[Q][0][0] = quant_val; cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); - cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0, + cpi->Y2quant_shift[Q][0] + 0, quant_val); cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.Y2dequant[Q][0][0] = quant_val; cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); - cpi->UVquant[Q][0][0] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->UVquant[Q][0] + 0, + cpi->UVquant_shift[Q][0] + 0, quant_val); cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;; cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.UVdequant[Q][0][0] = quant_val; @@ -144,21 +159,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) c = (rc & 3); quant_val = vp8_ac_yquant(Q); - cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->Y1quant[Q][r] + c, + cpi->Y1quant_shift[Q][r] + c, quant_val); cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.Y1dequant[Q][r][c] = quant_val; cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); - cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->Y2quant[Q][r] + c, + cpi->Y2quant_shift[Q][r] + c, quant_val); cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.Y2dequant[Q][r][c] = quant_val; cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); - cpi->UVquant[Q][r][c] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->UVquant[Q][r] + c, + cpi->UVquant_shift[Q][r] + c, quant_val); cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.UVdequant[Q][r][c] = quant_val; @@ -198,6 +216,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) for (i = 0; i < 16; i++) { x->block[i].quant = cpi->Y1quant[QIndex]; + x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].round = cpi->Y1round[QIndex]; x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; @@ -211,6 +230,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) for (i = 16; i < 24; i++) { x->block[i].quant = cpi->UVquant[QIndex]; + x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; x->block[i].zbin = cpi->UVzbin[QIndex]; x->block[i].round = cpi->UVround[QIndex]; x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex]; @@ -221,6 +241,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) // Y2 zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7; x->block[24].quant = cpi->Y2quant[QIndex]; + x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; x->block[24].zbin = cpi->Y2zbin[QIndex]; x->block[24].round = cpi->Y2round[QIndex]; x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index dd98a09d19a048448a66a0249264bad79aa577e8..54646f421bbb79ec7c4cc0dd76c891f69ae87160 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -286,6 +286,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) for (i = 0; i < 25; i++) { z->block[i].quant = x->block[i].quant; + z->block[i].quant_shift = x->block[i].quant_shift; z->block[i].zbin = x->block[i].zbin; z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; z->block[i].round = x->block[i].round; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index fcde2205db6ee170a1a771b8c4e4b62908c08be8..f76d2efcdf463938165f590f032520441de61138 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -234,14 +234,17 @@ typedef struct { DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]); DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]); diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 181870c1130a8119e85133c48f0c89a662537ebb..877002b08cf5c60778fe1ee24621e79f0b035734 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -25,6 +25,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) short *zbin_ptr = &b->zbin[0][0]; short *round_ptr = &b->round[0][0]; short *quant_ptr = &b->quant[0][0]; + short *quant_shift_ptr = &b->quant_shift[0][0]; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = &d->dequant[0][0]; @@ -45,7 +46,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) if (x >= zbin) { - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) + x += round_ptr[rc]; + y = (((x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value @@ -69,6 +72,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) short *zbin_ptr = &b->zbin[0][0]; short *round_ptr = &b->round[0][0]; short *quant_ptr = &b->quant[0][0]; + short *quant_shift_ptr = &b->quant_shift[0][0]; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = &d->dequant[0][0]; @@ -95,7 +99,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) if (x >= zbin) { - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) + x += round_ptr[rc]; + y = (((x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 11ef4197b4987174bfeed9383c56b1c2263e6af3..be226e040fccd11c1669fd84a9020775e7339738 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -238,7 +238,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx; cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx; + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/ } #endif @@ -285,8 +285,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; /* cpi->rtcd.encodemb.sub* not implemented for wmt */ - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse; - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2; + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse; + cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/ } #endif