Commit 983260b0 authored by Ronald S. Bultje's avatar Ronald S. Bultje
Browse files

swscale: split out x86/swscale_template.c from swscale.c.

parent 075d0ae7
......@@ -15,6 +15,7 @@ OBJS-$(HAVE_ALTIVEC) += ppc/swscale_altivec.o \
ppc/yuv2rgb_altivec.o \
ppc/yuv2yuv_altivec.o
OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
......
......@@ -61,16 +61,12 @@ untested special converters
#include "swscale_internal.h"
#include "rgb2rgb.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h"
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h"
#include "libavutil/pixdesc.h"
#undef MOVNTQ
#undef PAVGB
#define DITHER1XBPP
#define isPacked(x) ( \
......@@ -1182,45 +1178,14 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
}
}
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
//Plain C versions
#define COMPILE_TEMPLATE_MMX2 0
#include "swscale_template.c"
//MMX versions
#if HAVE_MMX
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 0
#define RENAME(a) a ## _MMX
#include "x86/swscale_template.c"
#endif
//MMX2 versions
#if HAVE_MMX2
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 1
#define RENAME(a) a ## _MMX2
#include "x86/swscale_template.c"
#endif
SwsFunc ff_getSwsFunc(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
sws_init_swScale_c(c);
#if HAVE_MMX
if (cpu_flags & AV_CPU_FLAG_MMX)
sws_init_swScale_MMX(c);
#endif
#if HAVE_MMX2
if (cpu_flags & AV_CPU_FLAG_MMX2)
sws_init_swScale_MMX2(c);
#endif
if (HAVE_MMX)
ff_sws_init_swScale_mmx(c);
if (HAVE_ALTIVEC)
ff_sws_init_swScale_altivec(c);
......
......@@ -329,6 +329,9 @@ int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4],
int brightness, int contrast, int saturation);
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf);
SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c);
SwsFunc ff_yuv2rgb_init_vis(SwsContext *c);
SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
......@@ -483,5 +486,6 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c);
SwsFunc ff_getSwsFunc(SwsContext *c);
void ff_sws_init_swScale_altivec(SwsContext *c);
void ff_sws_init_swScale_mmx(SwsContext *c);
#endif /* SWSCALE_SWSCALE_INTERNAL_H */
......@@ -467,11 +467,6 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int
#define DEBUG_SWSCALE_BUFFERS 0
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
#if HAVE_MMX
static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf);
#endif
static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
{
......
......@@ -18,9 +18,14 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef SWSCALE_X86_SWSCALE_TEMPLATE_H
#define SWSCALE_X86_SWSCALE_TEMPLATE_H
#include <inttypes.h>
#include "config.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
......@@ -32,12 +37,12 @@ DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
0x0103010301030103LL,
0x0200020002000200LL,};
0x0103010301030103LL,
0x0200020002000200LL,};
const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
0x0602060206020602LL,
0x0004000400040004LL,};
0x0602060206020602LL,
0x0004000400040004LL,};
DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
......@@ -76,4 +81,107 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
#endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */
//MMX versions
#if HAVE_MMX
#undef RENAME
#define COMPILE_TEMPLATE_MMX2 0
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
#endif
//MMX2 versions
#if HAVE_MMX2
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 1
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
#endif
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf)
{
const int dstH= c->dstH;
const int flags= c->flags;
int16_t **lumPixBuf= c->lumPixBuf;
int16_t **chrUPixBuf= c->chrUPixBuf;
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *vChrFilter= c->vChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
int32_t *chrMmxFilter= c->chrMmxFilter;
int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize;
const int chrDstY= dstY>>c->chrDstVSubSample;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
c->blueDither= ff_dither8[dstY&1];
if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
c->greenDither= ff_dither8[dstY&1];
else
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int i;
if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2) {
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
}
}
for (i=0; i<vChrFilterSize; i+=2) {
*(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
}
} else {
for (i=0; i<vLumFilterSize; i++) {
*(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
}
}
for (i=0; i<vChrFilterSize; i++) {
*(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
}
}
}
void ff_sws_init_swScale_mmx(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (cpu_flags & AV_CPU_FLAG_MMX)
sws_init_swScale_MMX(c);
if (cpu_flags & AV_CPU_FLAG_MMX2)
sws_init_swScale_MMX2(c);
}
......@@ -18,8 +18,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "swscale_template.h"
#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PREFETCH
......@@ -2185,86 +2183,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
}
#endif /* COMPILE_TEMPLATE_MMX2 */
#if !COMPILE_TEMPLATE_MMX2
static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf)
{
const int dstH= c->dstH;
const int flags= c->flags;
int16_t **lumPixBuf= c->lumPixBuf;
int16_t **chrUPixBuf= c->chrUPixBuf;
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *vChrFilter= c->vChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
int32_t *chrMmxFilter= c->chrMmxFilter;
int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize;
const int chrDstY= dstY>>c->chrDstVSubSample;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
c->blueDither= ff_dither8[dstY&1];
if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
c->greenDither= ff_dither8[dstY&1];
else
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int i;
if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2) {
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
}
}
for (i=0; i<vChrFilterSize; i+=2) {
*(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
}
} else {
for (i=0; i<vLumFilterSize; i++) {
*(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
}
}
for (i=0; i<vChrFilterSize; i++) {
*(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
}
}
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
static void RENAME(sws_init_swScale)(SwsContext *c)
{
enum PixelFormat srcFormat = c->srcFormat;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment