Commit 2dac4acf authored by Loren Merritt's avatar Loren Merritt
Browse files

sse & sse2 implementations of vorbis channel coupling.

9% faster vorbis (on a K8).

Originally committed as revision 5898 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 7bf00496
......@@ -35,6 +35,9 @@
/* snow.c */
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
/* vorbis.c */
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t squareTbl[512] = {0, };
......@@ -4090,6 +4093,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->inner_add_yblock = ff_snow_inner_add_yblock;
#endif
#ifdef CONFIG_VORBIS_DECODER
c->vorbis_inverse_coupling = vorbis_inverse_coupling;
#endif
c->shrink[0]= ff_img_copy_plane;
c->shrink[1]= ff_shrink22;
c->shrink[2]= ff_shrink44;
......
......@@ -307,6 +307,8 @@ typedef struct DSPContext {
void (*h261_loop_filter)(uint8_t *src, int stride);
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
void (*fdct248)(DCTELEM *block/* align 16*/);
......
......@@ -2711,6 +2711,59 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
}
#endif
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
{
int i;
asm volatile("pxor %%mm7, %%mm7":);
for(i=0; i<blocksize; i+=2) {
asm volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t"
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
"pslld $31, %%mm2 \n\t" // keep only the sign bit
"pxor %%mm2, %%mm1 \n\t"
"movq %%mm3, %%mm4 \n\t"
"pand %%mm1, %%mm3 \n\t"
"pandn %%mm1, %%mm4 \n\t"
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
"movq %%mm3, %1 \n\t"
"movq %%mm0, %0 \n\t"
:"+m"(mag[i]), "+m"(ang[i])
::"memory"
);
}
asm volatile("emms");
}
static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize)
{
int i;
for(i=0; i<blocksize; i+=4) {
asm volatile(
"movaps %0, %%xmm0 \n\t"
"movaps %1, %%xmm1 \n\t"
"pxor %%xmm2, %%xmm2 \n\t"
"pxor %%xmm3, %%xmm3 \n\t"
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
"pslld $31, %%xmm2 \n\t" // keep only the sign bit
"pxor %%xmm2, %%xmm1 \n\t"
"movaps %%xmm3, %%xmm4 \n\t"
"pand %%xmm1, %%xmm3 \n\t"
"pandn %%xmm1, %%xmm4 \n\t"
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
"movaps %%xmm3, %1 \n\t"
"movaps %%xmm0, %0 \n\t"
:"+m"(mag[i]), "+m"(ang[i])
::"memory"
);
}
}
#ifdef CONFIG_SNOW_ENCODER
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
......@@ -3137,6 +3190,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
#endif
if(mm_flags & MM_SSE2)
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2;
else if(mm_flags & MM_SSE)
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
}
#ifdef CONFIG_ENCODERS
......
......@@ -929,6 +929,7 @@ static int vorbis_decode_init(AVCodecContext *avccontext) {
int i, j, hdr_type;
vc->avccontext = avccontext;
dsputil_init(&vc->dsp, avccontext);
if (!headers_len) {
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
......@@ -1443,6 +1444,31 @@ static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fa
return 0;
}
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
{
int i;
for(i=0; i<blocksize; i++)
{
if (mag[i]>0.0) {
if (ang[i]>0.0) {
ang[i]=mag[i]-ang[i];
} else {
float temp=ang[i];
ang[i]=mag[i];
mag[i]+=temp;
}
} else {
if (ang[i]>0.0) {
ang[i]+=mag[i];
} else {
float temp=ang[i];
ang[i]=mag[i];
mag[i]-=temp;
}
}
}
}
// Decode the audio packet using the functions above
#define BIAS 385
......@@ -1541,26 +1567,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
for(j=0;j<blocksize/2;++j) {
float temp;
if (mag[j]>0.0) {
if (ang[j]>0.0) {
ang[j]=mag[j]-ang[j];
} else {
temp=ang[j];
ang[j]=mag[j];
mag[j]+=temp;
}
} else {
if (ang[j]>0.0) {
ang[j]+=mag[j];
} else {
temp=ang[j];
ang[j]=mag[j];
mag[j]-=temp;
}
}
}
vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
}
// Dotproduct
......
......@@ -87,6 +87,7 @@ typedef struct {
typedef struct vorbis_context_s {
AVCodecContext *avccontext;
GetBitContext gb;
DSPContext dsp;
MDCTContext mdct0;
MDCTContext mdct1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment