From abff678866f9f74c58616705b01d460efb994fe2 Mon Sep 17 00:00:00 2001 From: Jingning Han <jingning@google.com> Date: Mon, 26 Aug 2013 16:12:16 -0700 Subject: [PATCH] Fix overflow issue in SSSE3 32x32 quantization The 32x32 quantization process can potentially have the intermediate stacks over 16-bit range, thereby causing enc/dec mismatch. This commit fixes this overflow issue in the SSSE3 implementation, as well as the prototype, of 32x32 quantization. This fixes issue 607 from webm@googlecode. Change-Id: I85635e6ca236b90c3dcfc40d449215c7b9caa806 --- vp9/common/vp9_rtcd_defs.sh | 2 +- vp9/encoder/vp9_quantize.c | 13 ++++++------- vp9/encoder/x86/vp9_quantize_ssse3.asm | 20 ++++++++++++-------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 2979daf6d1..d075443edc 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b specialize vp9_quantize_b $ssse3_x86_64 prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" -specialize vp9_quantize_b_32x32 +specialize vp9_quantize_b_32x32 $ssse3_x86_64 # # Structured Similarity (SSIM) diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 02c0685523..fb0e4707ac 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -84,7 +84,6 @@ void vp9_quantize_b_c(int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, *eob_ptr = eob + 1; } -// This function works well for large transform size. void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, @@ -105,8 +104,8 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs, eob = -1; // Base ZBIN - zbins[0] = zbin_ptr[0] + zbin_oq_value; - zbins[1] = zbin_ptr[1] + zbin_oq_value; + zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); + zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); nzbins[0] = zbins[0] * -1; nzbins[1] = zbins[1] * -1; @@ -114,7 +113,7 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs, // Pre-scan pass for (i = 0; i < n_coeffs; i++) { rc = scan[i]; - z = coeff_ptr[rc] * 2; + z = coeff_ptr[rc]; // If the coefficient is out of the base ZBIN range, keep it for // quantization. @@ -130,14 +129,14 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs, // Calculate ZBIN zbin = (zbins[rc != 0]); - z = coeff_ptr[rc] * 2; + z = coeff_ptr[rc]; sz = (z >> 31); // sign of z x = (z ^ sz) - sz; // x = abs(z) if (x >= zbin) { - x += (round_ptr[rc != 0]); + x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) * - quant_shift_ptr[rc != 0]) >> 16; // quantize (x) + quant_shift_ptr[rc != 0]) >> 15; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm index 60f7991955..7deb9815a3 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -36,6 +36,14 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pshufd m4, m4, 0 mova m2, [quantq] ; m2 = quant paddw m0, m4 ; m0 = zbin + zbin_oq +%ifidn %1, b_32x32 + pcmpeqw m5, m5 + psrlw m5, 15 + paddw m0, m5 + paddw m1, m5 + psrlw m0, 1 ; m0 = (m0 + 1) / 2 + psrlw m1, 1 ; m1 = (m1 + 1) / 2 +%endif mova m3, [r2q] ; m3 = dequant psubw m0, [pw_1] mov r2, shiftmp @@ -43,6 +51,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m4, [r2] ; m4 = shift mov r4, dqcoeffmp mov r5, iscanmp +%ifidn %1, b_32x32 + psllw m4, 1 +%endif pxor m5, m5 ; m5 = dedicated zero DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob lea coeffq, [ coeffq+ncoeffq*2] @@ -56,10 +67,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, b_32x32 - paddw m6, m6 - paddw m11, m11 -%endif pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin punpckhqdq m0, m0 pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin @@ -112,10 +119,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, b_32x32 - paddw m6, m6 - paddw m11, m11 -%endif pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin %ifidn %1, b_32x32 @@ -164,6 +167,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmaxsw m8, m13 add ncoeffq, mmsize jl .ac_only_loop + %ifidn %1, b_32x32 jmp .accumulate_eob .skip_iter: -- GitLab