Commit 80ba1ddb authored by Justin Ruggles's avatar Justin Ruggles Committed by Mans Rullgard
Browse files

Remove unneeded add bias from 3 functions.



DSPContext.vector_fmul_window()
DCADSPContext.lfe_fir()
SynthFilterContext.synth_filter_float()
Signed-off-by: default avatarMans Rullgard <mans@mansr.com>
parent 8cb3c557
......@@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
*/
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 0, 512);
ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
} else {
memcpy( out, saved, 448 * sizeof(float));
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
} else {
ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 0, 64);
ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
memcpy( out + 576, buf + 64, 448 * sizeof(float));
}
}
......@@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
// buffer update
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
memcpy( saved, temp + 64, 64 * sizeof(float));
ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
memcpy( saved, buf + 512, 448 * sizeof(float));
......
......@@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
for(i=0; i<128; i++)
x[i] = s->transform_coeffs[ch][2*i];
ff_imdct_half(&s->imdct_256, s->tmp_output, x);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
for(i=0; i<128; i++)
x[i] = s->transform_coeffs[ch][2*i+1];
ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
} else {
ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
}
}
......
......@@ -23,7 +23,7 @@
#include "libavcodec/dcadsp.h"
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
int decifactor, float scale, float bias);
int decifactor, float scale);
void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
{
......
......@@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1
cmp r3, #32
moveq r6, #256/32
movne r6, #256/64
NOVFP vldr d0, [sp, #16] @ scale, bias
NOVFP vldr s0, [sp, #16] @ scale
mov lr, #-16
1:
vmov.f32 q2, #0.0 @ v0
......@@ -51,8 +51,7 @@ NOVFP vldr d0, [sp, #16] @ scale, bias
vadd.f32 d4, d4, d5
vadd.f32 d6, d6, d7
vpadd.f32 d4, d4, d6
vdup.32 d5, d0[1]
vmla.f32 d5, d4, d0[0]
vmul.f32 d5, d4, d0[0]
vst1.32 {d5[0]}, [r0,:32]!
vst1.32 {d5[1]}, [r4,:32]!
bne 1b
......
......@@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win,
float add_bias, int len);
const float *src1, const float *win, int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
......
......@@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1
endfunc
function ff_vector_fmul_window_neon, export=1
VFP vdup.32 q8, d0[0]
NOVFP vld1.32 {d16[],d17[]}, [sp,:32]
push {r4,r5,lr}
VFP ldr lr, [sp, #12]
NOVFP ldr lr, [sp, #16]
ldr lr, [sp, #12]
sub r2, r2, #8
sub r5, lr, #2
add r2, r2, r5, lsl #2
......@@ -793,14 +790,12 @@ NOVFP ldr lr, [sp, #16]
vld1.64 {d4,d5}, [r3,:128]!
vld1.64 {d6,d7}, [r4,:128], r5
1: subs lr, lr, #4
vmov q11, q8
vmla.f32 d22, d0, d4
vmov q10, q8
vmla.f32 d23, d1, d5
vmul.f32 d22, d0, d4
vrev64.32 q3, q3
vmla.f32 d20, d0, d7
vmul.f32 d23, d1, d5
vrev64.32 q1, q1
vmla.f32 d21, d1, d6
vmul.f32 d20, d0, d7
vmul.f32 d21, d1, d6
beq 2f
vmla.f32 d22, d3, d7
vld1.64 {d0,d1}, [r1,:128]!
......
......@@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
float out[32], const float in[32],
float scale, float bias);
float scale);
av_cold void ff_fft_init_arm(FFTContext *s)
{
......
......@@ -42,7 +42,7 @@ VFP vpop {d0}
ldr r5, [sp, #9*4] @ window
ldr r2, [sp, #10*4] @ out
NOVFP vldr d0, [sp, #12*4] @ scale, bias
NOVFP vldr s0, [sp, #12*4] @ scale
add r8, r9, #12*4
mov lr, #64*4
......@@ -90,10 +90,8 @@ NOVFP vldr d0, [sp, #12*4] @ scale, bias
sub r11, r11, #512*4
b 2b
3:
vdup.32 q8, d0[1]
vdup.32 q9, d0[1]
vmla.f32 q8, q10, d0[0]
vmla.f32 q9, q1, d0[0]
vmul.f32 q8, q10, d0[0]
vmul.f32 q9, q1, d0[0]
vst1.32 {q3}, [r3,:128]
sub r3, r3, #16*4
vst1.32 {q2}, [r3,:128]
......
......@@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
/* overlap and window */
q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
&su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16);
&su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16);
prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
start_pos += block_size;
......
......@@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
s->synth.synth_filter_float(&s->imdct,
s->subband_fir_hist[chans], &s->hist_index[chans],
s->subband_fir_noidea[chans], prCoeff,
samples_out, s->raXin, scale, 0);
samples_out, s->raXin, scale);
samples_out+= 32;
}
......@@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
/* Interpolation */
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
scale, 0);
scale);
samples_in++;
samples_out += 2 * decifactor;
}
......
......@@ -23,7 +23,7 @@
#include "dcadsp.h"
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
int decifactor, float scale, float bias)
int decifactor, float scale)
{
float *out2 = out + decifactor;
const float *cf0 = coefs;
......@@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
v0 += s * *cf0++;
v1 += s * *--cf1;
}
*out++ = (v0 * scale) + bias;
*out2++ = (v1 * scale) + bias;
*out++ = v0 * scale;
*out2++ = v1 * scale;
}
}
......
......@@ -21,7 +21,7 @@
typedef struct DCADSPContext {
void (*lfe_fir)(float *out, const float *in, const float *coefs,
int decifactor, float scale, float bias);
int decifactor, float scale);
} DCADSPContext;
void ff_dcadsp_init(DCADSPContext *s);
......
......@@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
dst[i] = src0[i] * src1[i] + src2[i];
}
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
static void vector_fmul_window_c(float *dst, const float *src0,
const float *src1, const float *win, int len)
{
int i,j;
dst += len;
win += len;
......@@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
float s1 = src1[j];
float wi = win[i];
float wj = win[j];
dst[i] = s0*wj - s1*wi + add_bias;
dst[j] = s0*wi + s1*wj + add_bias;
dst[i] = s0*wj - s1*wi;
dst[j] = s0*wi + s1*wj;
}
}
......@@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->vector_fmul = vector_fmul_c;
c->vector_fmul_reverse = vector_fmul_reverse_c;
c->vector_fmul_add = vector_fmul_add_c;
c->vector_fmul_window = ff_vector_fmul_window_c;
c->vector_fmul_window = vector_fmul_window_c;
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
c->vector_clipf = vector_clipf_c;
c->float_to_int16 = ff_float_to_int16_c;
......
......@@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len);
/* encoding scans */
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
......@@ -393,7 +390,7 @@ typedef struct DSPContext {
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
......
......@@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
}
}
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
{
union {
vector float v;
float s[4];
} vadd;
vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;
vector float zero, t0, t1, s0, s1, wi, wj;
const vector unsigned char reverse = vcprm(3,2,1,0);
int i,j;
......@@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
win += len;
src0+= len;
vadd.s[0] = add_bias;
vadd_bias = vec_splat(vadd.v, 0);
zero = (vector float)vec_splat_u32(0);
for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
......@@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
s1 = vec_perm(s1, s1, reverse);
wj = vec_perm(wj, wj, reverse);
t0 = vec_madd(s0, wj, vadd_bias);
t0 = vec_madd(s0, wj, zero);
t0 = vec_nmsub(s1, wi, t0);
t1 = vec_madd(s0, wi, vadd_bias);
t1 = vec_madd(s0, wi, zero);
t1 = vec_madd(s1, wj, t1);
t1 = vec_perm(t1, t1, reverse);
......
......@@ -24,7 +24,7 @@
static void synth_filter_float(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
float out[32], const float in[32], float scale, float bias)
float out[32], const float in[32], float scale)
{
float *synth_buf= synth_buf_ptr + *synth_buf_offset;
int i, j;
......@@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct,
c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
}
out[i ] = a*scale + bias;
out[i + 16] = b*scale + bias;
out[i ] = a*scale;
out[i + 16] = b*scale;
synth_buf2[i ] = c;
synth_buf2[i + 16] = d;
}
......
......@@ -28,7 +28,7 @@ typedef struct SynthFilterContext {
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
float out[32], const float in[32],
float scale, float bias);
float scale);
} SynthFilterContext;
void ff_synth_filter_init(SynthFilterContext *c);
......
......@@ -646,7 +646,6 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
prev_buf + (bsize-wsize)/2,
buf1 + bsize*j,
ff_sine_windows[av_log2(wsize)],
0.0,
wsize/2);
out2 += wsize;
......
......@@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
const float *win = vc->win[blockflag & previous_window];
if (blockflag == previous_window) {
vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4);
vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4);
} else if (blockflag > previous_window) {
vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4);
vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4);
memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
} else {
memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4);
vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
}
memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
}
......
......@@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
winlen >>= 1;
s->dsp.vector_fmul_window(start, start, start + winlen,
window, 0, winlen);
window, winlen);
s->channel[c].prev_block_len = s->subframe_len;
}
......
......@@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1
);
}
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len){
#if HAVE_6REGS
if(add_bias == 0){
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
const float *win, int len){
x86_reg i = -len*4;
x86_reg j = len*4-8;
__asm__ volatile(
......@@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float
:"+r"(i), "+r"(j)
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
);
}else
#endif
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
}
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len){
#if HAVE_6REGS
if(add_bias == 0){
const float *win, int len){
x86_reg i = -len*4;
x86_reg j = len*4-16;
__asm__ volatile(
......@@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s
:"+r"(i), "+r"(j)
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
);
}else
#endif
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
}
#endif /* HAVE_6REGS */
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
{
......@@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
#if HAVE_6REGS
c->vector_fmul_window = vector_fmul_window_3dnow2;
#endif
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
}
......@@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->vector_fmul = vector_fmul_sse;
c->vector_fmul_reverse = vector_fmul_reverse_sse;
c->vector_fmul_add = vector_fmul_add_sse;
#if HAVE_6REGS
c->vector_fmul_window = vector_fmul_window_sse;
#endif
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
c->vector_clipf = vector_clipf_sse;
c->float_to_int16 = float_to_int16_sse;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment