Commit 79687079 authored by Justin Ruggles's avatar Justin Ruggles

x86: add support for fmaddps fma4 instruction with abstraction to avx/sse

parent 0cf7d849
......@@ -242,6 +242,7 @@ Optimization options (experts only):
--disable-sse disable SSE optimizations
--disable-ssse3 disable SSSE3 optimizations
--disable-avx disable AVX optimizations
--disable-fma4 disable FMA4 optimizations
--disable-armv5te disable armv5te optimizations
--disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations
......@@ -1047,6 +1048,7 @@ ARCH_EXT_LIST='
armv6t2
armvfp
avx
fma4
mmi
mmx
mmx2
......@@ -1295,6 +1297,7 @@ mmx2_deps="mmx"
sse_deps="mmx"
ssse3_deps="sse"
avx_deps="ssse3"
fma4_deps="avx"
aligned_stack_if_any="ppc x86"
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
......@@ -2865,6 +2868,7 @@ EOF
check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
die "yasm not found, use --disable-yasm for a crippled build"
check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
fi
case "$cpu" in
......@@ -3292,6 +3296,7 @@ if enabled x86; then
echo "SSE enabled ${sse-no}"
echo "SSSE3 enabled ${ssse3-no}"
echo "AVX enabled ${avx-no}"
echo "FMA4 enabled ${fma4-no}"
echo "CMOV enabled ${cmov-no}"
echo "CMOV is fast ${fast_cmov-no}"
echo "EBX available ${ebx_available-no}"
......
......@@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1
%undef j
%macro FMA_INSTR 3
%macro %1 4-7 %1, %2, %3
%if cpuflag(xop)
v%5 %1, %2, %3, %4
%macro %1 5-8 %1, %2, %3
%if cpuflag(xop) || cpuflag(fma4)
v%6 %1, %2, %3, %4
%else
%6 %1, %2, %3
%7 %1, %4
%ifidn %1, %4
%7 %5, %2, %3
%8 %1, %4, %5
%else
%7 %1, %2, %3
%8 %1, %4
%endif
%endif
%endmacro
%endmacro
FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment