Commit 65dd056e authored by Debargha Mukherjee's avatar Debargha Mukherjee Committed by Gerrit Code Review

Merge "Optimize vpx_quantize_{b,b_32x32} assembler."

parents 35cae7f1 9cfba09a
......@@ -30,13 +30,13 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k])));
vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
......
......@@ -30,13 +30,13 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k])));
vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
......
......@@ -248,7 +248,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
endif
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
endif
endif
endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
......
......@@ -913,25 +913,19 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Quantization
#
if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc";
specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b sse2/;
specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
} else {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b sse2/;
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
} # CONFIG_VP9_HIGHBITDEPTH
add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment