Commit 239fdf1b authored by Diego Biurrun's avatar Diego Biurrun

x86: build: replace mmx2 by mmxext

Refactoring mmx2/mmxext YASM code with cpuflags will force renames.
So switching to a consistent naming scheme beforehand is sensible.
The name "mmxext" is more official and widespread and also the name
of the CPU flag, as reported e.g. by the Linux kernel.
parent 66adb7ce
......@@ -1378,7 +1378,7 @@ PREDEFINED = "__attribute__(x)=" \
"DEF(x)=x ## _TMPL" \
HAVE_AV_CONFIG_H \
HAVE_MMX \
HAVE_MMX2 \
HAVE_MMXEXT \
HAVE_AMD3DNOW \
"DECLARE_ALIGNED(a,t,n)=t n" \
"offsetof(x,y)=0x42"
......
......@@ -243,7 +243,7 @@ Optimization options (experts only):
--disable-amd3dnow disable 3DNow! optimizations
--disable-amd3dnowext disable 3DNow! extended optimizations
--disable-mmx disable MMX optimizations
--disable-mmx2 disable MMX2 optimizations
--disable-mmxext disable MMXEXT optimizations
--disable-sse disable SSE optimizations
--disable-ssse3 disable SSSE3 optimizations
--disable-avx disable AVX optimizations
......@@ -1054,7 +1054,7 @@ ARCH_EXT_LIST='
fma4
mmi
mmx
mmx2
mmxext
neon
ppc4xx
sse
......@@ -1302,7 +1302,7 @@ x86_64_suggest="cmov fast_cmov"
amd3dnow_deps="mmx"
amd3dnowext_deps="amd3dnow"
mmx_deps="x86"
mmx2_deps="mmx"
mmxext_deps="mmx"
sse_deps="mmx"
ssse3_deps="sse"
avx_deps="ssse3"
......@@ -2861,9 +2861,9 @@ EOF
# check whether xmm clobbers are supported
check_asm xmm_clobbers '"":::"%xmm0"'
# check whether binutils is new enough to compile SSSE3/MMX2
# check whether binutils is new enough to compile SSSE3/MMXEXT
enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"'
enabled mmxext && check_asm mmxext '"pmaxub %mm0, %mm1"'
if ! disabled_any asm mmx yasm; then
if check_cmd $yasmexe --version; then
......@@ -3307,7 +3307,7 @@ echo "runtime cpu detection ${runtime_cpudetect-no}"
if enabled x86; then
echo "${yasmexe} ${yasm-no}"
echo "MMX enabled ${mmx-no}"
echo "MMX2 enabled ${mmx2-no}"
echo "MMXEXT enabled ${mmxext-no}"
echo "3DNow! enabled ${amd3dnow-no}"
echo "3DNow! extended enabled ${amd3dnowext-no}"
echo "SSE enabled ${sse-no}"
......
......@@ -13,6 +13,11 @@ libavutil: 2011-04-18
API changes, most recent first:
2012-08-03 - xxxxxxx - lavu 51.37.1 - cpu.h
lsws 2.1.1 - swscale.h
Rename AV_CPU_FLAG_MMX2 ---> AV_CPU_FLAG_MMXEXT.
Rename SWS_CPU_CAPS_MMX2 ---> SWS_CPU_CAPS_MMXEXT.
2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h
Add AVFMT_FLAG_NOBUFFER for low latency use cases.
......
......@@ -87,7 +87,7 @@ static const struct algo fdct_tab[] = {
#if HAVE_MMX && HAVE_INLINE_ASM
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
{ "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 },
{ "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT },
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
#endif
......@@ -111,7 +111,7 @@ static const struct algo idct_tab[] = {
#if HAVE_MMX && HAVE_INLINE_ASM
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
{ "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
{ "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
#endif
......
......@@ -116,8 +116,8 @@ int main(int argc, char **argv)
AVCodecContext *ctx;
int c;
DSPContext cctx, mmxctx;
int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMX2 };
int flags_size = HAVE_MMX2 ? 2 : 1;
int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT };
int flags_size = HAVE_MMXEXT ? 2 : 1;
if (argc > 1) {
help();
......
......@@ -68,7 +68,7 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
%define LOOP_ALIGN
INIT_MMX
AC3_EXPONENT_MIN mmx
%if HAVE_MMX2
%if HAVE_MMXEXT
%define PMINUB PMINUB_MMXEXT
%define LOOP_ALIGN ALIGN 16
AC3_EXPONENT_MIN mmxext
......
......@@ -65,7 +65,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
}
}
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
}
......
......@@ -486,7 +486,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
int mm_flags = av_get_cpu_flags();
#if HAVE_INLINE_ASM
if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx);
if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx);
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
#endif /* HAVE_INLINE_ASM */
}
......@@ -3028,7 +3028,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
c->idct_add = ff_idct_xvid_sse2_add;
c->idct = ff_idct_xvid_sse2;
c->idct_permutation_type = FF_SSE2_IDCT_PERM;
} else if (mm_flags & AV_CPU_FLAG_MMX2) {
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->idct_put = ff_idct_xvid_mmx2_put;
c->idct_add = ff_idct_xvid_mmx2_add;
c->idct = ff_idct_xvid_mmx2;
......@@ -3044,7 +3044,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
dsputil_init_mmx(c, avctx, mm_flags);
}
if (mm_flags & AV_CPU_FLAG_MMX2)
if (mm_flags & AV_CPU_FLAG_MMXEXT)
dsputil_init_mmx2(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW)
......
......@@ -1111,7 +1111,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
if(mm_flags & AV_CPU_FLAG_SSE2){
c->fdct = ff_fdct_sse2;
}else if(mm_flags & AV_CPU_FLAG_MMX2){
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->fdct = ff_fdct_mmx2;
}else{
c->fdct = ff_fdct_mmx;
......@@ -1144,8 +1144,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->sum_abs_dctelem= sum_abs_dctelem_mmx2;
c->vsad[4]= vsad_intra16_mmx2;
......@@ -1186,7 +1185,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
}
......
......@@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
}
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2;
if (chroma_format_idc == 1)
......@@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
}
} else if (bit_depth == 10) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
......
......@@ -218,7 +218,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
#if HAVE_YASM
int mm_flags = av_get_cpu_flags();
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2)
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT)
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
if (bit_depth == 8) {
......@@ -236,7 +236,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
if (mm_flags & AV_CPU_FLAG_CMOV)
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
......@@ -304,7 +304,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
}
} else if (bit_depth == 10) {
if (mm_flags & AV_CPU_FLAG_MMX) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
#if ARCH_X86_32
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
......
......@@ -444,7 +444,7 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
c->sad[0]= sad16_mmx;
c->sad[1]= sad8_mmx;
}
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->pix_abs[0][0] = sad16_mmx2;
c->pix_abs[1][0] = sad8_mmx2;
......
......@@ -595,15 +595,15 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
#define HAVE_SSSE3 0
#undef HAVE_SSE2
#undef HAVE_MMX2
#undef HAVE_MMXEXT
#define HAVE_SSE2 0
#define HAVE_MMX2 0
#define HAVE_MMXEXT 0
#define RENAME(a) a ## _MMX
#define RENAMEl(a) a ## _mmx
#include "mpegvideo_mmx_template.c"
#undef HAVE_MMX2
#define HAVE_MMX2 1
#undef HAVE_MMXEXT
#define HAVE_MMXEXT 1
#undef RENAME
#undef RENAMEl
#define RENAME(a) a ## _MMX2
......@@ -660,7 +660,7 @@ void ff_MPV_common_init_mmx(MpegEncContext *s)
#endif
if(mm_flags & AV_CPU_FLAG_SSE2){
s->dct_quantize= dct_quantize_SSE2;
} else if(mm_flags & AV_CPU_FLAG_MMX2){
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
s->dct_quantize= dct_quantize_MMX2;
} else {
s->dct_quantize= dct_quantize_MMX;
......
......@@ -48,7 +48,7 @@
#define MMREG_WIDTH "8"
#define MM "%%mm"
#define MOVQ "movq"
#if HAVE_MMX2
#if HAVE_MMXEXT
#define SPREADW(a) "pshufw $0, "a", "a" \n\t"
#define PMAXW(a,b) "pmaxsw "a", "b" \n\t"
#define PMAX(a,b) \
......
......@@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp)
if (flags & AV_CPU_FLAG_MMX)
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
#endif
if (flags & AV_CPU_FLAG_MMX2)
if (flags & AV_CPU_FLAG_MMXEXT)
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
if (flags & AV_CPU_FLAG_SSE2)
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
......
......@@ -37,7 +37,7 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
if (mm_flags & AV_CPU_FLAG_MMX)
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
c->rv34_idct_add = ff_rv34_idct_add_mmx2;
}
......
......@@ -200,7 +200,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
QPEL_MC_SET(put_, _mmx)
#endif
}
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2;
......
......@@ -889,7 +889,7 @@ void ff_dwt_init_x86(DWTContext *c)
c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
}
else{
if(mm_flags & AV_CPU_FLAG_MMX2){
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
#if HAVE_7REGS
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
......
......@@ -757,7 +757,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
}
if (mm_flags & AV_CPU_FLAG_MMX2){
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2;
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2;
......@@ -798,7 +798,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
}
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
ASSIGN_LF(mmx2);
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
......
......@@ -49,7 +49,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
}
#endif
if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) {
if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) {
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
if (!(flags & CODEC_FLAG_BITEXACT)) {
......
......@@ -350,7 +350,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
/* note that 4-tap width=16 functions are missing because w=16
* is only used for luma, and luma is always a copy or sixtap. */
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
VP8_MC_FUNC(2, 4, mmx2);
VP8_BILINEAR_MC_FUNC(2, 4, mmx2);
#if ARCH_X86_32
......
......@@ -28,7 +28,7 @@
DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
#if HAVE_MMX2
#if HAVE_MMXEXT
static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
{
intptr_t x;
......@@ -173,8 +173,8 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf)
int cpu_flags = av_get_cpu_flags();
#if HAVE_INLINE_ASM
#if HAVE_MMX2
if (cpu_flags & AV_CPU_FLAG_MMX2)
#if HAVE_MMXEXT
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
gf->filter_line = gradfun_filter_line_mmx2;
#endif
#if HAVE_SSSE3
......
......@@ -45,7 +45,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
#undef COMPILE_TEMPLATE_SSE
#endif
#if HAVE_MMX2
#if HAVE_MMXEXT
#undef RENAME
#define RENAME(a) a ## _mmx2
#include "yadif_template.c"
......@@ -58,8 +58,8 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
int cpu_flags = av_get_cpu_flags();
#if HAVE_INLINE_ASM
#if HAVE_MMX2
if (cpu_flags & AV_CPU_FLAG_MMX2)
#if HAVE_MMXEXT
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
yadif->filter_line = yadif_filter_line_mmx2;
#endif
#if HAVE_SSE
......
......@@ -47,10 +47,10 @@ void av_set_cpu_flags_mask(int mask)
int av_parse_cpu_flags(const char *s)
{
#define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_CMOV)
#define CPUFLAG_MMXEXT (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT | AV_CPU_FLAG_CMOV)
#define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX)
#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW)
#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2)
#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMXEXT)
#define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE)
#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2)
#define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2)
......@@ -67,7 +67,7 @@ int av_parse_cpu_flags(const char *s)
{ "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" },
#elif ARCH_X86
{ "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" },
{ "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" },
{ "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMXEXT }, .unit = "flags" },
{ "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" },
{ "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" },
{ "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" },
......@@ -129,7 +129,7 @@ static const struct {
{ AV_CPU_FLAG_ALTIVEC, "altivec" },
#elif ARCH_X86
{ AV_CPU_FLAG_MMX, "mmx" },
{ AV_CPU_FLAG_MMX2, "mmx2" },
{ AV_CPU_FLAG_MMXEXT, "mmxext" },
{ AV_CPU_FLAG_SSE, "sse" },
{ AV_CPU_FLAG_SSE2, "sse2" },
{ AV_CPU_FLAG_SSE2SLOW, "sse2(slow)" },
......
......@@ -21,11 +21,16 @@
#ifndef AVUTIL_CPU_H
#define AVUTIL_CPU_H
#include "version.h"
#define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */
/* lower 16 bits - CPU features */
#define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX
#define AV_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext
#if LIBAVUTIL_VERSION_MAJOR < 52
#define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext
#endif
#define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW
#define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions
#define AV_CPU_FLAG_SSE2 0x0010 ///< PIV SSE2 functions
......
......@@ -38,7 +38,7 @@
#define LIBAVUTIL_VERSION_MAJOR 51
#define LIBAVUTIL_VERSION_MINOR 37
#define LIBAVUTIL_VERSION_MICRO 0
#define LIBAVUTIL_VERSION_MICRO 1
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
......
......@@ -122,7 +122,7 @@ int ff_get_cpu_flags_x86(void)
if (std_caps & (1 << 23))
rval |= AV_CPU_FLAG_MMX;
if (std_caps & (1 << 25))
rval |= AV_CPU_FLAG_MMX2;
rval |= AV_CPU_FLAG_MMXEXT;
#if HAVE_SSE
if (std_caps & (1 << 25))
rval |= AV_CPU_FLAG_SSE;
......@@ -159,7 +159,7 @@ int ff_get_cpu_flags_x86(void)
if (ext_caps & (1 << 23))
rval |= AV_CPU_FLAG_MMX;
if (ext_caps & (1 << 22))
rval |= AV_CPU_FLAG_MMX2;
rval |= AV_CPU_FLAG_MMXEXT;
/* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some
......
......@@ -661,8 +661,8 @@ static int swScale(SwsContext *c, const uint8_t *src[],
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
#if HAVE_MMX2 && HAVE_INLINE_ASM
if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
#if HAVE_MMXEXT && HAVE_INLINE_ASM
if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
__asm__ volatile ("sfence" ::: "memory");
#endif
emms_c();
......
......@@ -82,7 +82,10 @@ const char *swscale_license(void);
* are only provided for API compatibility.
*/
#define SWS_CPU_CAPS_MMX 0x80000000
#define SWS_CPU_CAPS_MMXEXT 0x20000000
#if LIBSWSCALE_VERSION_MAJOR < 3
#define SWS_CPU_CAPS_MMX2 0x20000000
#endif
#define SWS_CPU_CAPS_3DNOW 0x40000000
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
#define SWS_CPU_CAPS_BFIN 0x01000000
......
......@@ -577,7 +577,7 @@ fail:
return ret;
}
#if HAVE_MMX2 && HAVE_INLINE_ASM
#if HAVE_MMXEXT && HAVE_INLINE_ASM
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
int16_t *filter, int32_t *filterPos, int numSplits)
{
......@@ -740,7 +740,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
return fragmentPos + 1;
}
#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */
static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
{
......@@ -973,7 +973,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
(FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16,
fail);
if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 &&
if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT &&
c->srcBpc == 8 && c->dstBpc <= 10) {
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
(srcW & 15) == 0) ? 1 : 0;
......@@ -1012,7 +1012,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
/* precalculate horizontal scaler filter coefficients */
{
#if HAVE_MMX2 && HAVE_INLINE_ASM
#if HAVE_MMXEXT && HAVE_INLINE_ASM
// can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
......@@ -1048,7 +1048,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
#endif
} else
#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */
{
const int filterAlign =
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
......@@ -1208,7 +1208,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#endif
sws_format_name(dstFormat));
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT)
av_log(c, AV_LOG_INFO, "using MMX2\n");
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
av_log(c, AV_LOG_INFO, "using 3DNOW\n");
......
......@@ -28,7 +28,7 @@
#define LIBSWSCALE_VERSION_MAJOR 2
#define LIBSWSCALE_VERSION_MINOR 1
#define LIBSWSCALE_VERSION_MICRO 0
#define LIBSWSCALE_VERSION_MICRO 1
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
LIBSWSCALE_VERSION_MINOR, \
......
......@@ -85,7 +85,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_SSE2 0
......@@ -96,8 +96,8 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
//MMX2 versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 1
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _MMX2
#include "rgb2rgb_template.c"
......@@ -110,10 +110,10 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
//3DNOW versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_MMXEXT
#undef COMPILE_TEMPLATE_SSE2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_SSE2 0
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNOW
......@@ -137,7 +137,7 @@ av_cold void rgb2rgb_init_x86(void)
rgb2rgb_init_MMX();
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
rgb2rgb_init_3DNOW();
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT)
rgb2rgb_init_MMX2();
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
rgb2rgb_init_SSE2();
......
......@@ -35,7 +35,7 @@
#if COMPILE_TEMPLATE_AMD3DNOW
#define PREFETCH "prefetch"
#define PAVGB "pavgusb"
#elif COMPILE_TEMPLATE_MMX2
#elif COMPILE_TEMPLATE_MMXEXT
#define PREFETCH "prefetchnta"
#define PAVGB "pavgb"
#else
......@@ -49,7 +49,7 @@
#define EMMS "emms"
#endif
#if COMPILE_TEMPLATE_MMX2
#if COMPILE_TEMPLATE_MMXEXT
#define MOVNTQ "movntq"
#define SFENCE "sfence"