diff --git a/build/make/configure.sh b/build/make/configure.sh index 23cf44302e5caaa7200938ed13469d2c91851a7b..eead3b5638f80d8147f4609839f7d9c94bfff544 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -629,7 +629,7 @@ process_common_toolchain() { case ${toolchain} in sparc-solaris-*) add_extralibs -lposix4 - add_cflags "-DMUST_BE_ALIGNED" + disable fast_unaligned ;; *-solaris-*) add_extralibs -lposix4 diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 3b0405ca1c640319773fe27e607e4f53b22cb6e3..84cda1334eb240d5bcd74c5be75975828f6a4c30 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -10,6 +10,7 @@ #include "vpx_ports/config.h" +#include "vpx/vpx_integer.h" #include "recon.h" #include "subpixel.h" #include "blockd.h" @@ -18,12 +19,6 @@ #include "onyxc_int.h" #endif -/* use this define on systems where unaligned int reads and writes are - * not allowed, i.e. ARM architectures - */ -/*#define MUST_BE_ALIGNED*/ - - static const int bbb[4] = {0, 2, 8, 10}; @@ -39,7 +34,7 @@ void vp8_copy_mem16x16_c( for (r = 0; r < 16; r++) { -#ifdef MUST_BE_ALIGNED +#if !(CONFIG_FAST_UNALIGNED) dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; @@ -58,10 +53,10 @@ void vp8_copy_mem16x16_c( dst[15] = src[15]; #else - ((int *)dst)[0] = ((int *)src)[0] ; - ((int *)dst)[1] = ((int *)src)[1] ; - ((int *)dst)[2] = ((int *)src)[2] ; - ((int *)dst)[3] = ((int *)src)[3] ; + ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ; + ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ; + ((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ; + ((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ; #endif src += src_stride; @@ -81,7 +76,7 @@ void vp8_copy_mem8x8_c( for (r = 0; r < 8; r++) { -#ifdef MUST_BE_ALIGNED +#if !(CONFIG_FAST_UNALIGNED) dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; @@ -91,8 +86,8 @@ void vp8_copy_mem8x8_c( dst[6] = src[6]; dst[7] = src[7]; #else - ((int *)dst)[0] = ((int *)src)[0] ; - ((int *)dst)[1] = ((int *)src)[1] ; + ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ; + ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ; #endif src += src_stride; dst += dst_stride; @@ -111,7 +106,7 @@ void vp8_copy_mem8x4_c( for (r = 0; r < 4; r++) { -#ifdef MUST_BE_ALIGNED +#if !(CONFIG_FAST_UNALIGNED) dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; @@ -121,8 +116,8 @@ void vp8_copy_mem8x4_c( dst[6] = src[6]; dst[7] = src[7]; #else - ((int *)dst)[0] = ((int *)src)[0] ; - ((int *)dst)[1] = ((int *)src)[1] ; + ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ; + ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ; #endif src += src_stride; dst += dst_stride; @@ -154,13 +149,13 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf) for (r = 0; r < 4; r++) { -#ifdef MUST_BE_ALIGNED +#if !(CONFIG_FAST_UNALIGNED) pred_ptr[0] = ptr[0]; pred_ptr[1] = ptr[1]; pred_ptr[2] = ptr[2]; pred_ptr[3] = ptr[3]; #else - *(int *)pred_ptr = *(int *)ptr ; + *(uint32_t *)pred_ptr = *(uint32_t *)ptr ; #endif pred_ptr += pitch; ptr += d->pre_stride; diff --git a/vp8/decoder/asm_dec_offsets.c b/vp8/decoder/asm_dec_offsets.c index dd2aad2c371c6be7dba3b603bce97c4c5a5d0c1f..1a6090b2bfdc51c9346cc65a9a55938482942943 100644 --- a/vp8/decoder/asm_dec_offsets.c +++ b/vp8/decoder/asm_dec_offsets.c @@ -17,7 +17,6 @@ BEGIN DEFINE(detok_scan, offsetof(DETOK, scan)); DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove)); DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr)); -DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr)); DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr)); DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x)); @@ -35,9 +34,6 @@ DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value)); DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count)); DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range)); -DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val)); -DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length)); - END /* add asserts for any offset that is not supported by assembly code */ diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 6121490eba6718636036cb40fb2124344ee826d1..025f9dfe4c40280b178d81c5f01a3413c7ad8557 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -50,54 +50,45 @@ DECLARE_ALIGNED(64, static const unsigned char, coef_bands_x_8x8[64]) = { #define CAT_THREE_CONTEXT_NODE 9 #define CAT_FIVE_CONTEXT_NODE 10 -/* -//the definition is put in "onyxd_int.h" -typedef struct -{ - INT16 min_val; - INT16 Length; - UINT8 Probs[14]; -} TOKENEXTRABITS; -*/ +#define CAT1_MIN_VAL 5 +#define CAT2_MIN_VAL 7 +#define CAT3_MIN_VAL 11 +#define CAT4_MIN_VAL 19 +#define CAT5_MIN_VAL 35 +#define CAT6_MIN_VAL 67 +#define CAT1_PROB0 159 +#define CAT2_PROB0 145 +#define CAT2_PROB1 165 + +#define CAT3_PROB0 140 +#define CAT3_PROB1 148 +#define CAT3_PROB2 173 + +#define CAT4_PROB0 135 +#define CAT4_PROB1 140 +#define CAT4_PROB2 155 +#define CAT4_PROB3 176 + +#define CAT5_PROB0 130 +#define CAT5_PROB1 134 +#define CAT5_PROB2 141 +#define CAT5_PROB3 157 +#define CAT5_PROB4 180 + #if CONFIG_EXTEND_QRANGE -DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) = -{ - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ZERO_TOKEN */ - { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ONE_TOKEN */ - { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* TWO_TOKEN */ - { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* THREE_TOKEN */ - { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* FOUR_TOKEN */ - { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY1 */ - { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY2 */ - { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY3 */ - { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY4 */ - { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY5 */ - { 67, 12, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 249, 252, 254, 254, 0 } }, /* DCT_VAL_CATEGORY6 */ - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* EOB TOKEN */ -}; +static const unsigned char cat6_prob[14] = +{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 249, 252, 254, 254, 0 }; #else -DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) = -{ - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ZERO_TOKEN */ - { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ONE_TOKEN */ - { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* TWO_TOKEN */ - { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* THREE_TOKEN */ - { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* FOUR_TOKEN */ - { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY1 */ - { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY2 */ - { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY3 */ - { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY4 */ - { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY5 */ - { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, /* DCT_VAL_CATEGORY6 */ - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* EOB TOKEN */ -}; +static const unsigned char cat6_prob[12] = +{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 }; #endif void vp8_reset_mb_tokens_context(MACROBLOCKD *x) { /* Clear entropy contexts for Y2 blocks */ - if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != B_PRED && + x->mode_info_context->mbmi.mode != SPLITMV) { vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -227,7 +218,7 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); qcoeff_ptr [ scan[c] ] = (INT16) v; \ ++c; \ goto DO_WHILE; }\ - qcoeff_ptr [ scan[15] ] = (INT16) v; \ + qcoeff_ptr [ 15 ] = (INT16) v; \ goto BLOCK_FINISHED; #if CONFIG_T8X8 @@ -251,8 +242,8 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); goto BLOCK_FINISHED_8x8; #endif -#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\ - split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \ +#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\ + split = 1 + (((range-1) * prob) >> 8); \ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ FILL \ if(value >= bigsplit)\ @@ -658,7 +649,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) scan = vp8_default_zig_zag1d; qcoeff_ptr = &x->qcoeff[0]; - if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != B_PRED && + x->mode_info_context->mbmi.mode != SPLITMV) { i = 24; stop = 24; @@ -694,16 +686,21 @@ DO_WHILE: CHECK_0_: DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_); DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_); - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val; - bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length; + DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], + LOW_VAL_CONTEXT_NODE_0_); + DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], + HIGH_LOW_CONTEXT_NODE_0_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], + CAT_THREEFOUR_CONTEXT_NODE_0_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], + CAT_FIVE_CONTEXT_NODE_0_); + + val = CAT6_MIN_VAL; + bits_count = CONFIG_EXTEND_QRANGE?14:12; do { - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count); + DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count); bits_count -- ; } while (bits_count >= 0); @@ -711,41 +708,43 @@ CHECK_0_: DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); CAT_FIVE_CONTEXT_NODE_0_: - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0); + val = CAT5_MIN_VAL; + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0); DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); CAT_THREEFOUR_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_); - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], + CAT_THREE_CONTEXT_NODE_0_); + val = CAT4_MIN_VAL; + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0); DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); CAT_THREE_CONTEXT_NODE_0_: - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0); + val = CAT3_MIN_VAL; + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0); DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); HIGH_LOW_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], + CAT_ONE_CONTEXT_NODE_0_); - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0); + val = CAT2_MIN_VAL; + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0); DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); CAT_ONE_CONTEXT_NODE_0_: - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0); + val = CAT1_MIN_VAL; + DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0); DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); LOW_VAL_CONTEXT_NODE_0_: @@ -770,7 +769,7 @@ ONE_CONTEXT_NODE_0_: goto DO_WHILE; } - qcoeff_ptr [ scan[15] ] = (INT16) v; + qcoeff_ptr [ 15 ] = (INT16) v; BLOCK_FINISHED: *a = *l = ((eobs[i] = c) != !type); /* any nonzero data? */ eobtotal += c; diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 6309915ab7ffa1b618ab05a1d2e1144294d93e6e..bb05ea826763301cd057f85adfdcbbbbe1e65032 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -44,21 +44,6 @@ typedef struct int size; } DATARATE; -#if CONFIG_EXTEND_QRANGE -typedef struct -{ - INT16 min_val; - INT16 Length; - UINT8 Probs[14]; -} TOKENEXTRABITS; -#else -typedef struct -{ - INT16 min_val; - INT16 Length; - UINT8 Probs[12]; -} TOKENEXTRABITS; -#endif typedef struct { int const *scan; @@ -67,7 +52,6 @@ typedef struct #endif UINT8 const *ptr_block2leftabove; vp8_tree_index const *vp8_coef_tree_ptr; - TOKENEXTRABITS const *teb_base_ptr; unsigned char *norm_ptr; UINT8 *ptr_coef_bands_x; #if CONFIG_T8X8 diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm index 1c41c322a9d5e76f2271b743b07df4dcf29207f9..9e05521664edb8a147acf033b8cd010b92bf3ed3 100644 --- a/vp8/encoder/x86/sad_sse3.asm +++ b/vp8/encoder/x86/sad_sse3.asm @@ -43,7 +43,7 @@ %define ret_var r11 %define result_ptr [rsp+xmm_stack_space+8+4*8] %define max_err [rsp+xmm_stack_space+8+4*8] - %define height [rsp+xmm_stack_space+8+4*8] + %define height dword ptr [rsp+xmm_stack_space+8+4*8] %else %define src_ptr rdi %define src_stride rsi