diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 5012edd6167598e50f9b3006ca271f73feb66c90..a90c1c0b653612e0e9a6728ec6a53fdb0aebdded 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -187,7 +187,7 @@ typedef struct int dst; int dst_stride; - int eob; + char *eob; union b_mode_info bmi; } BLOCKD; diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index 7712b59b727a13efa1e9d651afbf9bbf5bbbd693..090c19ccac8d3aab38d2810cab1c41f9c7673055 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -15,7 +15,7 @@ void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) { - if (b->eob > 1) + if (*b->eob > 1) { IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch, *(b->base_dst) + b->dst, b->dst_stride); @@ -65,6 +65,3 @@ void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD } } - - - diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c index 054042c0b3ee1f504b6922ad3ad16a4dfe8456bd..11fa3ffa7835e1770833a34bbcc19f1f50f9dd37 100644 --- a/vp8/common/mbpitch.c +++ b/vp8/common/mbpitch.c @@ -118,6 +118,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { x->block[r].qcoeff = x->qcoeff + r * 16; x->block[r].dqcoeff = x->dqcoeff + r * 16; + x->block[r].eob = x->eobs + r; } } diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm index ae2f6030dc527b0e5c42bace52527aaac043125b..d61f5d94d8dc2b241d016e57d22a2db4380bb6d3 100644 --- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm @@ -102,9 +102,10 @@ loop bne loop ; PART 2: check position for eob... + ldr r11, [sp, #0] ; restore BLOCKD pointer mov lr, #0 ; init eob cmp r1, #0 ; coeffs after quantization? - ldr r11, [sp, #0] ; restore BLOCKD pointer + ldr r12, [r11, #vp8_blockd_eob] beq end ; skip eob calculations if all zero ldr r0, [r11, #vp8_blockd_qcoeff] @@ -212,7 +213,7 @@ quant_coeff_1_0 mov lr, #1 ; rc=0, i=0 end - str lr, [r11, #vp8_blockd_eob] + strb lr, [r12] ldmfd sp!, {r1, r4-r11, pc} ENDP diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm index dcf3c5090912f8d5567a56f6ede717ef9bdd4b87..c970cb73e240c0abf27bdc13d4d282c2df58624d 100644 --- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm +++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm @@ -135,17 +135,16 @@ vmovl.u16 q0, d0 vmovl.u16 q10, d20 - vmax.u32 d0, d0, d1 vmax.u32 d20, d20, d21 vpmax.u32 d0, d0, d0 vpmax.u32 d20, d20, d20 - add r4, r2, #vp8_blockd_eob - add r5, r3, #vp8_blockd_eob + ldr r4, [r2, #vp8_blockd_eob] + ldr r5, [r3, #vp8_blockd_eob] - vst1.32 {d0[0]}, [r4@32] - vst1.32 {d20[0]}, [r5@32] + vst1.8 {d0[0]}, [r4] ; store eob + vst1.8 {d20[0]}, [r5] ; store eob vldmia sp!, {q4-q7} ldmfd sp!, {r4-r9} @@ -196,6 +195,8 @@ vshr.s16 q12, #1 ; right shift 1 after vqdmulh vshr.s16 q13, #1 + ldr r5, [r1, #vp8_blockd_eob] + orr r2, r2, r3 ; check if all zero (step 4) cmp r2, #0 ; check if all zero (step 5) beq zero_output ; check if all zero (step 6) @@ -230,14 +231,13 @@ vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant - add r4, r1, #vp8_blockd_eob - vst1.32 {d0[0]}, [r4@32] + vst1.8 {d0[0]}, [r5] ; store eob ldmfd sp!, {r4-r7} bx lr zero_output - str r2, [r1, #vp8_blockd_eob] + strb r2, [r5] ; store eob vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0 vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0 diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c index 52d84013e8c130d65b064fc11fe86ec491d62393..5b3a0275fe69b6d56d89ef2713748fddf4137db7 100644 --- a/vp8/encoder/arm/quantize_arm.c +++ b/vp8/encoder/arm/quantize_arm.c @@ -46,7 +46,7 @@ void vp8_quantize_mb_neon(MACROBLOCK *x) &x->e_mbd.block[i], &x->e_mbd.block[i+1]); if (has_2nd_order) - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); + x->quantize_b(&x->block[24], &x->e_mbd.block[24]); } diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index 16f6875b1bdcfaca335224d2b88e37cdd66dc722..faa1a8e3300a7a58af5ec7a4f6a5f2e93530f397 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -274,7 +274,7 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type, qcoeff_ptr = d->qcoeff; dqcoeff_ptr = d->dqcoeff; i0 = !type; - eob = d->eob; + eob = *d->eob; /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; @@ -466,8 +466,8 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type, } final_eob++; - d->eob = final_eob; - *a = *l = (d->eob != !type); + *a = *l = (final_eob != !type); + *d->eob = (char)final_eob; } static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) @@ -650,7 +650,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, { BLOCKD *b = &x->block[i]; - if (b->eob > 1) + if (*b->eob > 1) { IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 16, *(b->base_dst) + b->dst, b->dst_stride); @@ -667,7 +667,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, { BLOCKD *b = &x->block[i]; - if (b->eob > 1) + if (*b->eob > 1) { IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 8, *(b->base_dst) + b->dst, b->dst_stride); diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 503d241238d2ee09cc0ada17fae74c031a45552b..22cbbee85b8f322827e351ee37a87992bcad0f8e 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -62,7 +62,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) } } } - d->eob = eob + 1; + *d->eob = (char)(eob + 1); } #else @@ -97,7 +97,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) eob = i; // last nonzero coeffs } } - d->eob = eob + 1; + *d->eob = (char)(eob + 1); } #endif @@ -152,7 +152,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) } } - d->eob = eob + 1; + *d->eob = (char)(eob + 1); } /* Perform regular quantization, with unbiased rounding and no zero bin. */ @@ -210,7 +210,7 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d) } } - d->eob = eob + 1; + *d->eob = (char)(eob + 1); } #else @@ -264,7 +264,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) } } - d->eob = eob + 1; + *d->eob = (char)(eob + 1); } #endif @@ -731,4 +731,3 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) vp8cx_init_quantizer(cpi); } - diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index fdb519c19951e3f2cc8a29561fde89860b87e828..7950960de57a68f9e5d81b97060756f45b69d4d6 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -485,7 +485,7 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { int c = !type; /* start at coef 0, unless Y with Y2 */ - int eob = b->eob; + int eob = (int)(*b->eob); int pt ; /* surrounding block/prev coef predictor */ int cost = 0; short *qcoeff_ptr = b->qcoeff; @@ -1299,11 +1299,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, // store everything needed to come back to this!! for (i = 0; i < 16; i++) { - BLOCKD *bd = &x->e_mbd.block[i]; - bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; bsi->modes[i] = x->partition_info->bmi[i].mode; - bsi->eobs[i] = bd->eob; + bsi->eobs[i] = x->e_mbd.eobs[i]; } } } @@ -1432,7 +1430,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, BLOCKD *bd = &x->e_mbd.block[i]; bd->bmi.mv.as_int = bsi.mvs[i].as_int; - bd->eob = bsi.eobs[i]; + *bd->eob = bsi.eobs[i]; } *returntotrate = bsi.r; @@ -2271,7 +2269,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int for (i = 0; i <= 24; i++) { - tteob += x->e_mbd.block[i].eob; + tteob += x->e_mbd.eobs[i]; } if (tteob == 0) diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index c8db4f0672a7873d3a1b43588a169bf54eed232c..e819485676d75b305552a42922b4bde170e72c19 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -108,15 +108,16 @@ static void tokenize2nd_order_b ENTROPY_CONTEXT * a; ENTROPY_CONTEXT * l; int band, rc, v, token; + int eob; b = x->block + 24; qcoeff_ptr = b->qcoeff; a = (ENTROPY_CONTEXT *)x->above_context + 8; l = (ENTROPY_CONTEXT *)x->left_context + 8; - + eob = x->eobs[24]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - if(!b->eob) + if(!eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; @@ -142,7 +143,7 @@ static void tokenize2nd_order_b t++; c = 1; - for (; c < b->eob; c++) + for (; c < eob; c++) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; @@ -213,7 +214,7 @@ static void tokenize1st_order_b c = type ? 0 : 1; - if(c >= b->eob) + if(c >= *b->eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; @@ -240,7 +241,7 @@ static void tokenize1st_order_b t++; c++; - for (; c < b->eob; c++) + for (; c < *b->eob; c++) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; @@ -284,7 +285,7 @@ static void tokenize1st_order_b VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - if(!b->eob) + if(!(*b->eob)) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; @@ -311,7 +312,7 @@ static void tokenize1st_order_b t++; c = 1; - for (; c < b->eob; c++) + for (; c < *b->eob; c++) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; @@ -356,11 +357,11 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) if (has_y2_block) { for (i = 0; i < 16; i++) - skip &= (x->block[i].eob < 2); + skip &= (x->eobs[i] < 2); } for (; i < 24 + has_y2_block; i++) - skip &= (!x->block[i].eob); + skip &= (!x->eobs[i]); return skip; } diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index c483933df1497d779124ad2fbf6e913e016b29fc..7c249ff88b0e61a38dd3a08896c02e69eafb65eb 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -194,6 +194,8 @@ ZIGZAG_LOOP 15 movdqa [rdi], xmm0 ; store dqcoeff movdqa [rdi + 16], xmm1 + mov rcx, [rsi + vp8_blockd_eob] + ; select the last value (in zig_zag order) for EOB pcmpeqw xmm2, xmm6 pcmpeqw xmm3, xmm6 @@ -214,7 +216,8 @@ ZIGZAG_LOOP 15 pmaxsw xmm2, xmm3 movd eax, xmm2 and eax, 0xff - mov [rsi + vp8_blockd_eob], eax + + mov BYTE PTR [rcx], al ; store eob ; begin epilog add rsp, stack_size @@ -337,6 +340,8 @@ sym(vp8_fast_quantize_b_sse2): pmaxsw xmm1, xmm5 + mov rcx, [rsi + vp8_blockd_eob] + ; now down to 8 pshufd xmm5, xmm1, 00001110b @@ -354,7 +359,8 @@ sym(vp8_fast_quantize_b_sse2): movd eax, xmm1 and eax, 0xff - mov [rsi + vp8_blockd_eob], eax + + mov BYTE PTR [rcx], al ; store eob ; begin epilog %if ABI_IS_32BIT diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm index 95e1c20744f7ede82625e696a34b1927c628dcb2..70eac0c0fed234d36e02f3c7bca213b1ff534f47 100644 --- a/vp8/encoder/x86/quantize_sse4.asm +++ b/vp8/encoder/x86/quantize_sse4.asm @@ -208,6 +208,8 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 movdqa [rdi], xmm0 movdqa [rdi + 16], xmm1 + mov rcx, [rsi + vp8_blockd_eob] + ; select the last value (in zig_zag order) for EOB pxor xmm6, xmm6 pcmpeqw xmm4, xmm6 @@ -225,7 +227,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 add eax, 1 and eax, edi - mov [rsi + vp8_blockd_eob], eax + mov BYTE PTR [rcx], al ; store eob ; begin epilog %if ABI_IS_32BIT diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 912007e02d7682697ce33af594954abb4e3d645e..34cc9c3bb4f5e018cea33e96d7938636b8f9655b 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -110,12 +110,14 @@ sym(vp8_fast_quantize_b_ssse3): movdqa [rcx], xmm2 ;store dqcoeff movdqa [rcx + 16], xmm3 ;store dqcoeff + mov rcx, [rsi + vp8_blockd_eob] + sub edi, edx ;check for all zeros in bit mask sar edi, 31 ;0 or -1 add eax, 1 and eax, edi ;if the bit mask was all zero, ;then eob = 0 - mov [rsi + vp8_blockd_eob], eax + mov BYTE PTR [rcx], al ;store eob ; begin epilog %if ABI_IS_32BIT diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 191d61c60c8e11e513cce54acc5fb8b1cb8ac94a..7f5208461809be295b3bd46f8ba2f3f190a7a60e 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -37,17 +37,17 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; - d->eob = vp8_fast_quantize_b_impl_mmx( - coeff_ptr, - zbin_ptr, - qcoeff_ptr, - dequant_ptr, - scan_mask, - - round_ptr, - quant_ptr, - dqcoeff_ptr - ); + *d->eob = (char)vp8_fast_quantize_b_impl_mmx( + coeff_ptr, + zbin_ptr, + qcoeff_ptr, + dequant_ptr, + scan_mask, + + round_ptr, + quant_ptr, + dqcoeff_ptr + ); } int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);