Commit 26301caa authored by Diego Biurrun's avatar Diego Biurrun
Browse files

x86: mmx2 ---> mmxext in asm constructs

parent da39cac8
......@@ -97,7 +97,7 @@ AC3_EXPONENT_MIN
por %1, %2
pshuflw %2, %1, q0001
por %1, %2
%elif cpuflag(mmx2)
%elif cpuflag(mmxext)
pshufw %2, %1, q0032
por %1, %2
pshufw %2, %1, q0001
......@@ -153,7 +153,7 @@ cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
INIT_MMX mmx
%define ABS2 ABS2_MMX
AC3_MAX_MSB_ABS_INT16 or_abs
INIT_MMX mmx2
INIT_MMX mmxext
%define ABS2 ABS2_MMXEXT
AC3_MAX_MSB_ABS_INT16 min_max
INIT_XMM sse2
......
......@@ -31,7 +31,7 @@ extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int n
extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
......@@ -182,7 +182,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
}
if (EXTERNAL_MMXEXT(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
}
if (EXTERNAL_SSE(mm_flags)) {
c->float_to_fixed24 = ff_float_to_fixed24_sse;
......
......@@ -108,7 +108,7 @@ cglobal scalarproduct_and_madd_int16_%1, 4,4,8, v1, v2, v3, order, mul
%endmacro
INIT_MMX
SCALARPRODUCT mmx2
SCALARPRODUCT mmxext
INIT_XMM
SCALARPRODUCT sse2
......@@ -327,8 +327,8 @@ APPLY_WINDOW_INT16 ssse3_atom, 0, 1
APPLY_WINDOW_INT16 ssse3, 0, 1
; void add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal add_hfyu_median_prediction_mmxext, 6,6,0, dst, top, diff, w, left, left_top
movq mm0, [topq]
movq mm2, mm0
movd mm4, [left_topq]
......@@ -804,7 +804,7 @@ ALIGN 128
mov valh, vall
%if %1 >= 8
movd mm0, vald
%if cpuflag(mmx2)
%if cpuflag(mmxext)
pshufw mm0, mm0, 0
%else ; mmx
punpcklwd mm0, mm0
......
......@@ -2045,21 +2045,21 @@ PREFETCH(prefetch_3dnow, prefetch)
void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc8_rnd_mmx2 (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
......@@ -2077,10 +2077,10 @@ void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, \
int stride, int h, int x, int y);
CHROMA_MC(put, 2, 10, mmx2)
CHROMA_MC(avg, 2, 10, mmx2)
CHROMA_MC(put, 4, 10, mmx2)
CHROMA_MC(avg, 4, 10, mmx2)
CHROMA_MC(put, 2, 10, mmxext)
CHROMA_MC(avg, 2, 10, mmxext)
CHROMA_MC(put, 4, 10, mmxext)
CHROMA_MC(avg, 4, 10, mmxext)
CHROMA_MC(put, 8, 10, sse2)
CHROMA_MC(avg, 8, 10, sse2)
CHROMA_MC(put, 8, 10, avx)
......@@ -2283,13 +2283,13 @@ static void vector_clipf_sse(float *dst, const float *src,
#endif /* HAVE_INLINE_ASM */
int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul);
int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul);
int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul);
......@@ -2313,9 +2313,9 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top);
void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top);
int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src,
int w, int left);
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src,
......@@ -2548,24 +2548,24 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
#if HAVE_YASM
if (!high_bit_depth && CONFIG_H264CHROMA) {
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmx2;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmx2;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmx2;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
}
if (bit_depth == 10 && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmx2;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmx2;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmx2;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmx2;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
}
/* slower than cmov version on AMD */
if (!(mm_flags & AV_CPU_FLAG_3DNOW))
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext;
c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2;
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;
if (avctx->flags & CODEC_FLAG_BITEXACT) {
c->apply_window_int16 = ff_apply_window_int16_mmxext_ba;
......
......@@ -265,7 +265,7 @@ HADAMARD8_DIFF_MMX mmx
%define ABS1 ABS1_MMXEXT
%define HSUM HSUM_MMXEXT
HADAMARD8_DIFF_MMX mmx2
HADAMARD8_DIFF_MMX mmxext
INIT_XMM
%define ABS2 ABS2_MMXEXT
......
......@@ -1104,7 +1104,7 @@ int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \
int stride, int h);
hadamard_func(mmx)
hadamard_func(mmx2)
hadamard_func(mmxext)
hadamard_func(sse2)
hadamard_func(ssse3)
......@@ -1195,8 +1195,8 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
if (EXTERNAL_MMXEXT(mm_flags)) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
}
if (EXTERNAL_SSE2(mm_flags)) {
......
......@@ -442,17 +442,17 @@ chroma_mc8_mmx_func put, vc1, nornd_mmx
chroma_mc8_mmx_func put, rv40, mmx
chroma_mc4_mmx_func put, h264, mmx
chroma_mc4_mmx_func put, rv40, mmx
chroma_mc2_mmx_func put, h264, mmx2
chroma_mc2_mmx_func put, h264, mmxext
%define CHROMAMC_AVG DIRECT_AVG
%define CHROMAMC_AVG4 COPY_AVG
%define PAVG pavgb
chroma_mc8_mmx_func avg, h264, rnd_mmx2
chroma_mc8_mmx_func avg, vc1, nornd_mmx2
chroma_mc8_mmx_func avg, rv40, mmx2
chroma_mc4_mmx_func avg, h264, mmx2
chroma_mc4_mmx_func avg, rv40, mmx2
chroma_mc2_mmx_func avg, h264, mmx2
chroma_mc8_mmx_func avg, h264, rnd_mmxext
chroma_mc8_mmx_func avg, vc1, nornd_mmxext
chroma_mc8_mmx_func avg, rv40, mmxext
chroma_mc4_mmx_func avg, h264, mmxext
chroma_mc4_mmx_func avg, rv40, mmxext
chroma_mc2_mmx_func avg, h264, mmxext
%define PAVG pavgusb
chroma_mc8_mmx_func avg, h264, rnd_3dnow
......
......@@ -253,7 +253,7 @@ INIT_XMM sse2
CHROMA_MC8 put
INIT_XMM avx
CHROMA_MC8 put
INIT_MMX mmx2
INIT_MMX mmxext
CHROMA_MC4 put
CHROMA_MC2 put
......@@ -262,6 +262,6 @@ INIT_XMM sse2
CHROMA_MC8 avg
INIT_XMM avx
CHROMA_MC8 avg
INIT_MMX mmx2
INIT_MMX mmxext
CHROMA_MC4 avg
CHROMA_MC2 avg
......@@ -504,7 +504,7 @@ cglobal deblock_h_luma_8, 0,5
RET
%endmacro ; DEBLOCK_LUMA
INIT_MMX mmx2
INIT_MMX mmxext
DEBLOCK_LUMA v8, 8
INIT_XMM sse2
DEBLOCK_LUMA v, 16
......@@ -783,11 +783,11 @@ DEBLOCK_LUMA_INTRA v
INIT_XMM avx
DEBLOCK_LUMA_INTRA v
%if ARCH_X86_64 == 0
INIT_MMX mmx2
INIT_MMX mmxext
DEBLOCK_LUMA_INTRA v8
%endif
INIT_MMX mmx2
INIT_MMX mmxext
%macro CHROMA_V_START 0
dec r2d ; alpha-1
......@@ -818,7 +818,7 @@ cglobal deblock_v_chroma_8, 5,6
movq m1, [t5+r1]
movq m2, [r0]
movq m3, [r0+r1]
call ff_chroma_inter_body_mmx2
call ff_chroma_inter_body_mmxext
movq [t5+r1], m1
movq [r0], m2
RET
......@@ -842,7 +842,7 @@ cglobal deblock_h_chroma_8, 5,7
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
movq buf0, m0
movq buf1, m3
call ff_chroma_inter_body_mmx2
call ff_chroma_inter_body_mmxext
movq m0, buf0
movq m3, buf1
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
......@@ -852,7 +852,7 @@ cglobal deblock_h_chroma_8, 5,7
RET
ALIGN 16
ff_chroma_inter_body_mmx2:
ff_chroma_inter_body_mmxext:
LOAD_MASK r2d, r3d
movd m6, [r4] ; tc0
punpcklbw m6, m6
......@@ -885,7 +885,7 @@ cglobal deblock_v_chroma_intra_8, 4,5
movq m1, [t5+r1]
movq m2, [r0]
movq m3, [r0+r1]
call ff_chroma_intra_body_mmx2
call ff_chroma_intra_body_mmxext
movq [t5+r1], m1
movq [r0], m2
RET
......@@ -896,12 +896,12 @@ cglobal deblock_v_chroma_intra_8, 4,5
cglobal deblock_h_chroma_intra_8, 4,6
CHROMA_H_START
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
call ff_chroma_intra_body_mmx2
call ff_chroma_intra_body_mmxext
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
RET
ALIGN 16
ff_chroma_intra_body_mmx2:
ff_chroma_intra_body_mmxext:
LOAD_MASK r2d, r3d
movq m5, m1
movq m6, m2
......@@ -1025,7 +1025,7 @@ ff_chroma_intra_body_mmx2:
jl %%.b_idx_loop
%endmacro
INIT_MMX mmx2
INIT_MMX mmxext
cglobal h264_loop_filter_strength, 9, 9, 0, bs, nnz, ref, mv, bidir, edges, \
step, mask_mv0, mask_mv1, field
%define b_idxq bidirq
......
......@@ -791,7 +791,7 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
%endmacro
%if ARCH_X86_64 == 0
INIT_MMX mmx2
INIT_MMX mmxext
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
INIT_XMM sse2
......@@ -906,7 +906,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
%endmacro
%if ARCH_X86_64 == 0
INIT_MMX mmx2
INIT_MMX mmxext
DEBLOCK_CHROMA
%endif
INIT_XMM sse2
......
......@@ -286,14 +286,14 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10
%endmacro
INIT_MMX
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0
; ff_h264_idct_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct_dc_add_8_mmxext, 3, 3, 0
DC_ADD_MMXEXT_INIT r1, r2
DC_ADD_MMXEXT_OP movh, r0, r2, r1
RET
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8_mmxext, 3, 3, 0
DC_ADD_MMXEXT_INIT r1, r2
DC_ADD_MMXEXT_OP mova, r0, r2, r1
lea r0, [r0+r2*4]
......@@ -354,9 +354,9 @@ cglobal h264_idct8_add4_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
ADD rsp, pad
RET
; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
; ff_h264_idct_add16_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16_8_mmxext, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
xor r5, r5
%ifdef PIC
lea picregq, [scan8_mem]
......@@ -421,9 +421,10 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block
jl .nextblock
REP_RET
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
; ff_h264_idct_add16intra_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride,
; const uint8_t nnzc[6*8])
cglobal h264_idct_add16intra_8_mmxext, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
xor r5, r5
%ifdef PIC
lea picregq, [scan8_mem]
......@@ -463,9 +464,10 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
jl .nextblock
REP_RET
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
; ff_h264_idct8_add4_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride,
; const uint8_t nnzc[6*8])
cglobal h264_idct8_add4_8_mmxext, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
%assign pad 128+4-(stack_offset&7)
SUB rsp, pad
......@@ -620,7 +622,7 @@ cglobal h264_idct_add8_8_mmx, 5, 8 + npicregs, 0, dst1, block_offset, block, str
call h264_idct_add8_mmx_plane
RET
h264_idct_add8_mmx2_plane:
h264_idct_add8_mmxext_plane:
.nextblock:
movzx r6, byte [scan8+r5]
movzx r6, byte [r4+r6]
......@@ -661,9 +663,9 @@ h264_idct_add8_mmx2_plane:
jnz .nextblock
rep ret
; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
; ff_h264_idct_add8_mmxext(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_8_mmxext, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
mov r5, 16
add r2, 512
%if ARCH_X86_64
......@@ -672,7 +674,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, st
%ifdef PIC
lea picregq, [scan8_mem]
%endif
call h264_idct_add8_mmx2_plane
call h264_idct_add8_mmxext_plane
mov r5, 32
add r2, 384
%if ARCH_X86_64
......@@ -680,12 +682,12 @@ cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, st
%else
add r0mp, gprsize
%endif
call h264_idct_add8_mmx2_plane
call h264_idct_add8_mmxext_plane
RET
INIT_MMX
; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
h264_idct_dc_add8_mmx2:
h264_idct_dc_add8_mmxext:
movd m0, [r2 ] ; 0 0 X D
punpcklwd m0, [r2+32] ; x X d D
paddsw m0, [pw_32]
......@@ -779,7 +781,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5 + ARCH_X86_64, 8
%else
add r0, r0m
%endif
call h264_idct_dc_add8_mmx2
call h264_idct_dc_add8_mmxext
.cycle%1end:
%if %1 < 7
add r2, 64
......@@ -828,7 +830,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7 + ARCH_X86_64, 8
mov r0, [r0]
add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
%endif
call h264_idct_dc_add8_mmx2
call h264_idct_dc_add8_mmxext
.cycle%1end:
%if %1 == 1
add r2, 384+64
......
......@@ -178,7 +178,7 @@ IDCT_ADD16_10
mova [%1+%3 ], m4
%endmacro
INIT_MMX mmx2
INIT_MMX mmxext
cglobal h264_idct_dc_add_10,3,3
movd m0, [r1]
paddd m0, [pd_32]
......
......@@ -120,7 +120,7 @@ cglobal pred16x16_horizontal_8, 2,3
INIT_MMX mmx
PRED16x16_H
INIT_MMX mmx2
INIT_MMX mmxext
PRED16x16_H
INIT_XMM ssse3
PRED16x16_H
......@@ -180,7 +180,7 @@ cglobal pred16x16_dc_8, 2,7
REP_RET
%endmacro
INIT_MMX mmx2
INIT_MMX mmxext
PRED16x16_DC
INIT_XMM sse2
PRED16x16_DC
......@@ -229,7 +229,7 @@ cglobal pred16x16_tm_vp8_8, 2,5
INIT_MMX mmx
PRED16x16_TM
INIT_MMX mmx2
INIT_MMX mmxext
PRED16x16_TM
INIT_XMM sse2
......@@ -309,14 +309,14 @@ cglobal pred16x16_plane_%1_8, 2,9,7
movhlps m1, m0
%endif
paddw m0, m1
%if cpuflag(mmx2)
%if cpuflag(mmxext)
PSHUFLW m1, m0, 0xE
%elif cpuflag(mmx)
mova m1, m0
psrlq m1, 32
%endif
paddw m0, m1
%if cpuflag(mmx2)
%if cpuflag(mmxext)
PSHUFLW m1, m0, 0x1
%elif cpuflag(mmx)
mova m1, m0
......@@ -536,7 +536,7 @@ INIT_MMX mmx
H264_PRED16x16_PLANE h264
H264_PRED16x16_PLANE rv40
H264_PRED16x16_PLANE svq3
INIT_MMX mmx2
INIT_MMX mmxext
H264_PRED16x16_PLANE h264
H264_PRED16x16_PLANE rv40
H264_PRED16x16_PLANE svq3
......@@ -582,7 +582,7 @@ cglobal pred8x8_plane_8, 2,9,7
paddw m0, m1
%if notcpuflag(ssse3)
%if cpuflag(mmx2)
%if cpuflag(mmxext)
PSHUFLW m1, m0, 0xE
%elif cpuflag(mmx)
mova m1, m0
......@@ -591,7 +591,7 @@ cglobal pred8x8_plane_8, 2,9,7
paddw m0, m1
%endif ; !ssse3
%if cpuflag(mmx2)
%if cpuflag(mmxext)
PSHUFLW m1, m0, 0x1
%elif cpuflag(mmx)
mova m1, m0
......@@ -716,7 +716,7 @@ ALIGN 16
INIT_MMX mmx
H264_PRED8x8_PLANE
INIT_MMX mmx2
INIT_MMX mmxext
H264_PRED8x8_PLANE
INIT_XMM sse2
H264_PRED8x8_PLANE
......@@ -763,7 +763,7 @@ cglobal pred8x8_horizontal_8, 2,3
INIT_MMX mmx
PRED8x8_H
INIT_MMX mmx2
INIT_MMX mmxext
PRED8x8_H
INIT_MMX ssse3
PRED8x8_H
......@@ -941,7 +941,7 @@ cglobal pred8x8_tm_vp8_8, 2,6
INIT_MMX mmx
PRED8x8_TM
INIT_MMX mmx2
INIT_MMX mmxext
PRED8x8_TM
INIT_XMM sse2
......@@ -2442,7 +2442,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
sub r3d, r4d
movd mm2, r1d
movd mm4, r3d
%if cpuflag(mmx2)
%if cpuflag(mmxext)
pshufw mm2, mm2, 0
pshufw mm4, mm4, 0
%else
......@@ -2465,7 +2465,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
INIT_MMX mmx
PRED4x4_TM
INIT_MMX mmx2
INIT_MMX mmxext
PRED4x4_TM
INIT_XMM ssse3
......
......@@ -182,7 +182,7 @@ PRED4x4_HD
HADDD %1, %2
%endmacro
INIT_MMX mmx2
INIT_MMX mmxext
cglobal pred4x4_dc_10, 3, 3
sub r0, r2
lea r1, [r0+r2*2]
......@@ -261,7 +261,7 @@ PRED4x4_VL
;-----------------------------------------------------------------------------
; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
INIT_MMX mmx2
INIT_MMX mmxext
cglobal pred4x4_horizontal_up_10, 3, 3
sub r0, r2
lea r1, [r0+r2*2]
......@@ -410,7 +410,7 @@ cglobal pred8x8_dc_10, 2, 6
RET