Commit 5b098b18 authored by James Yu's avatar James Yu Committed by Johann

VP9 common for ARMv8 by using NEON intrinsics 01

Add vp9_loopfilter_neon.c
- vp9_lpf_horizontal_4_neon
- vp9_lpf_vertical_4_neon
- vp9_lpf_horizontal_8_neon
- vp9_lpf_vertical_8_neon

Change-Id: I97a0d7b399a431c21ee77396be3d5f5a1f7ebccb
Signed-off-by: 's avatarJames Yu <james.yu@linaro.org>
parent 547cb14e
......@@ -594,4 +594,35 @@ INSTANTIATE_TEST_CASE_P(
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
#if HAVE_NEON && (!CONFIG_VP9_HIGHBITDEPTH)
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test6Param,
::testing::Values(
#if HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_16_neon,
&vp9_lpf_horizontal_16_c, 8),
#endif // HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_4_neon,
&vp9_lpf_horizontal_4_c, 8),
make_tuple(&vp9_lpf_horizontal_8_neon,
&vp9_lpf_horizontal_8_c, 8),
make_tuple(&vp9_lpf_vertical_4_neon,
&vp9_lpf_vertical_4_c, 8),
make_tuple(&vp9_lpf_vertical_8_neon,
&vp9_lpf_vertical_8_c, 8)));
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param,
::testing::Values(
#if HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_4_dual_neon,
&vp9_lpf_horizontal_4_dual_c, 8),
#endif // HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_8_dual_neon,
&vp9_lpf_horizontal_8_dual_c, 8),
make_tuple(&vp9_lpf_vertical_4_dual_neon,
&vp9_lpf_vertical_4_dual_c, 8),
make_tuple(&vp9_lpf_vertical_8_dual_neon,
&vp9_lpf_vertical_8_dual_c, 8)));
#endif // HAVE_NEON && (!CONFIG_VP9_HIGHBITDEPTH)
} // namespace
......@@ -18,8 +18,8 @@ void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_horizontal_8(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_8(s + 8, p, blimit1, limit1, thresh1, 1);
vp9_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
......@@ -44,6 +44,7 @@ void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
#if HAVE_NEON_ASM
void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
......@@ -51,3 +52,4 @@ void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
}
#endif // HAVE_NEON_ASM
This diff is collapsed.
......@@ -236,36 +236,29 @@ specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_vertical_4 mmx neon_asm dspr2/;
$vp9_lpf_vertical_4_neon_asm=vp9_lpf_vertical_4_neon;
specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_vertical_4_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_4_dual_neon_asm=vp9_lpf_vertical_4_dual_neon;
specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_4 mmx neon_asm dspr2/;
$vp9_lpf_horizontal_4_neon_asm=vp9_lpf_horizontal_4_neon;
specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_horizontal_4_dual sse2 neon_asm dspr2/;
......
......@@ -133,11 +133,9 @@ endif
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_add_neon$(ASM)
......@@ -155,4 +153,16 @@ VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_reconintra_neon$(ASM)
# neon with assembly and intrinsics implementations. If both are available
# prefer assembly.
ifeq ($(HAVE_NEON_ASM), yes)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_16_neon.c
else
ifeq ($(HAVE_NEON), yes)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_16_neon.c
endif # HAVE_NEON
endif # HAVE_NEON_ASM
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment