Commit b5422fab authored by Alex Converse's avatar Alex Converse
Browse files

Add an x86inc MMX fwht4x4.

Change-Id: Ib0a73d4863478f9b8a00976379d25d2f6ebbb197
Showing with 79 additions and 1 deletion
...@@ -353,6 +353,13 @@ INSTANTIATE_TEST_CASE_P( ...@@ -353,6 +353,13 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3))); make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
#endif #endif
#if CONFIG_USE_X86INC && HAVE_MMX
INSTANTIATE_TEST_CASE_P(
MMX, Trans4x4WHT,
::testing::Values(
make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0)));
#endif
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT, SSE2, Trans4x4DCT,
......
...@@ -693,7 +693,7 @@ add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int str ...@@ -693,7 +693,7 @@ add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int str
specialize qw/vp9_fht16x16 sse2 avx2/; specialize qw/vp9_fht16x16 sse2 avx2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fwht4x4/; specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct4x4 sse2 avx2/; specialize qw/vp9_fdct4x4 sse2 avx2/;
......
;
; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "third_party/x86inc/x86inc.asm"
SECTION .text
%macro TRANSFORM_COLS 0
paddw m0, m1
movq m4, m0
psubw m3, m2
psubw m4, m3
psraw m4, 1
movq m5, m4
psubw m5, m1 ;b1
psubw m4, m2 ;c1
psubw m0, m4
paddw m3, m5
; m0 a0
SWAP 1, 4 ; m1 c1
SWAP 2, 3 ; m2 d1
SWAP 3, 5 ; m3 b1
%endmacro
%macro TRANSPOSE_4X4 0
movq m4, m0
movq m5, m2
punpcklwd m4, m1
punpckhwd m0, m1
punpcklwd m5, m3
punpckhwd m2, m3
movq m1, m4
movq m3, m0
punpckldq m1, m5
punpckhdq m4, m5
punpckldq m3, m2
punpckhdq m0, m2
SWAP 2, 3, 0, 1, 4
%endmacro
INIT_MMX mmx
cglobal fwht4x4, 3, 4, 8, input, output, stride
lea r3q, [inputq + strideq*4]
movq m0, [inputq] ;a1
movq m1, [inputq + strideq*2] ;b1
movq m2, [r3q] ;c1
movq m3, [r3q + strideq*2] ;d1
TRANSFORM_COLS
TRANSPOSE_4X4
TRANSFORM_COLS
TRANSPOSE_4X4
psllw m0, 2
psllw m1, 2
psllw m2, 2
psllw m3, 2
movq [outputq], m0
movq [outputq + 8], m1
movq [outputq + 16], m2
movq [outputq + 24], m3
RET
...@@ -101,6 +101,7 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm ...@@ -101,6 +101,7 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
ifeq ($(CONFIG_USE_X86INC),yes) ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment