Commit 40edab5e authored by Parag Salasakar's avatar Parag Salasakar Committed by Johann
Browse files

mips dsp-ase r2 vp9 decoder convolve module optimizations

Change-Id: I401536778e3c68ba2b3ae3955c689d005e1f1d59
Showing with 4436 additions and 8 deletions
......@@ -294,6 +294,10 @@ $(set_function_pointers c $ALL_ARCHS)
void dsputil_static_init();
dsputil_static_init();
#endif
#if CONFIG_VP9
void vp9_dsputil_static_init();
vp9_dsputil_static_init();
#endif
#endif
}
#endif
......
......@@ -642,4 +642,26 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple(32, 64, &convolve8_neon),
make_tuple(64, 64, &convolve8_neon)));
#endif
#if HAVE_DSPR2
const ConvolveFunctions convolve8_dspr2(
vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2);
INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_dspr2),
make_tuple(8, 4, &convolve8_dspr2),
make_tuple(4, 8, &convolve8_dspr2),
make_tuple(8, 8, &convolve8_dspr2),
make_tuple(16, 8, &convolve8_dspr2),
make_tuple(8, 16, &convolve8_dspr2),
make_tuple(16, 16, &convolve8_dspr2),
make_tuple(32, 16, &convolve8_dspr2),
make_tuple(16, 32, &convolve8_dspr2),
make_tuple(32, 32, &convolve8_dspr2),
make_tuple(64, 32, &convolve8_dspr2),
make_tuple(32, 64, &convolve8_dspr2),
make_tuple(64, 64, &convolve8_dspr2)));
#endif
} // namespace
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_VP9_COMMON_DSPR2_H_
#define VP9_COMMON_VP9_COMMON_DSPR2_H_
#include <assert.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
#if HAVE_DSPR2
#define CROP_WIDTH 512
extern uint8_t *vp9_ff_cropTbl;
#define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input) ({ \
\
int32_t tmp, out; \
int dct_cost_rounding = DCT_CONST_ROUNDING; \
int in = input; \
\
__asm__ __volatile__ ( \
/* out = dct_const_round_shift(input_dc * cospi_16_64); */ \
"mtlo %[dct_cost_rounding], $ac1 \n\t"\
"mthi $zero, $ac1 \n\t"\
"madd $ac1, %[in], %[cospi_16_64] \n\t"\
"extp %[tmp], $ac1, 31 \n\t"\
\
/* out = dct_const_round_shift(out * cospi_16_64); */ \
"mtlo %[dct_cost_rounding], $ac2 \n\t"\
"mthi $zero, $ac2 \n\t"\
"madd $ac2, %[tmp], %[cospi_16_64] \n\t"\
"extp %[out], $ac2, 31 \n\t"\
\
: [tmp] "=&r" (tmp), [out] "=r" (out) \
: [in] "r" (in), \
[dct_cost_rounding] "r" (dct_cost_rounding), \
[cospi_16_64] "r" (cospi_16_64) \
); \
out; })
static INLINE void vp9_prefetch_load(const unsigned char *src) {
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
: [src] "r" (src)
);
}
/* prefetch data for store */
static INLINE void vp9_prefetch_store(unsigned char *dst) {
__asm__ __volatile__ (
"pref 1, 0(%[dst]) \n\t"
:
: [dst] "r" (dst)
);
}
static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) {
__asm__ __volatile__ (
"pref 4, 0(%[src]) \n\t"
:
: [src] "r" (src)
);
}
/* prefetch data for store */
static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {
__asm__ __volatile__ (
"pref 5, 0(%[dst]) \n\t"
:
: [dst] "r" (dst)
);
}
#endif // #if HAVE_DSPR2
#endif // VP9_COMMON_VP9_COMMON_DSPR2_H_
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -257,28 +257,28 @@ specialize vp9_blend_b
# Sub Pixel Filters
#
prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve_copy $sse2_x86inc neon
specialize vp9_convolve_copy $sse2_x86inc neon dspr2
prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve_avg $sse2_x86inc neon
specialize vp9_convolve_avg $sse2_x86inc neon dspr2
prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8 ssse3 neon
specialize vp9_convolve8 ssse3 neon dspr2
prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_horiz ssse3 neon
specialize vp9_convolve8_horiz ssse3 neon dspr2
prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_vert ssse3 neon
specialize vp9_convolve8_vert ssse3 neon dspr2
prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_avg ssse3 neon
specialize vp9_convolve8_avg ssse3 neon dspr2
prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_avg_horiz ssse3 neon
specialize vp9_convolve8_avg_horiz ssse3 neon dspr2
prototype void vp9_convolve8_avg_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_avg_vert ssse3 neon
specialize vp9_convolve8_avg_vert ssse3 neon dspr2
#
# dct
......
......@@ -87,6 +87,14 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
endif
# common (c)
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_common_dspr2.h
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_avg_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_vert_dspr2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment