Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
BC
public
external
ffmpeg
Commits
e034cc6c
Commit
e034cc6c
authored
Jan 06, 2013
by
Justin Ruggles
Committed by
Luca Barbato
Jan 16, 2013
Browse files
lavc: Move vector_fmul_window to AVFloatDSPContext
Signed-off-by:
Luca Barbato
<
lu_zero@gentoo.org
>
parent
a7ba3244
Changes
22
Hide whitespace changes
Inline
Side-by-side
libavcodec/aacdec.c
View file @
e034cc6c
...
...
@@ -2173,35 +2173,35 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
*/
if
((
ics
->
window_sequence
[
1
]
==
ONLY_LONG_SEQUENCE
||
ics
->
window_sequence
[
1
]
==
LONG_STOP_SEQUENCE
)
&&
(
ics
->
window_sequence
[
0
]
==
ONLY_LONG_SEQUENCE
||
ics
->
window_sequence
[
0
]
==
LONG_START_SEQUENCE
))
{
ac
->
dsp
.
vector_fmul_window
(
out
,
saved
,
buf
,
lwindow_prev
,
512
);
ac
->
f
dsp
.
vector_fmul_window
(
out
,
saved
,
buf
,
lwindow_prev
,
512
);
}
else
{
memcpy
(
out
,
saved
,
448
*
sizeof
(
float
));
memcpy
(
out
,
saved
,
448
*
sizeof
(
float
));
if
(
ics
->
window_sequence
[
0
]
==
EIGHT_SHORT_SEQUENCE
)
{
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
0
*
128
,
saved
+
448
,
buf
+
0
*
128
,
swindow_prev
,
64
);
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
1
*
128
,
buf
+
0
*
128
+
64
,
buf
+
1
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
2
*
128
,
buf
+
1
*
128
+
64
,
buf
+
2
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
out
+
448
+
3
*
128
,
buf
+
2
*
128
+
64
,
buf
+
3
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
temp
,
buf
+
3
*
128
+
64
,
buf
+
4
*
128
,
swindow
,
64
);
memcpy
(
out
+
448
+
4
*
128
,
temp
,
64
*
sizeof
(
float
));
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
0
*
128
,
saved
+
448
,
buf
+
0
*
128
,
swindow_prev
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
1
*
128
,
buf
+
0
*
128
+
64
,
buf
+
1
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
2
*
128
,
buf
+
1
*
128
+
64
,
buf
+
2
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
+
3
*
128
,
buf
+
2
*
128
+
64
,
buf
+
3
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
temp
,
buf
+
3
*
128
+
64
,
buf
+
4
*
128
,
swindow
,
64
);
memcpy
(
out
+
448
+
4
*
128
,
temp
,
64
*
sizeof
(
float
));
}
else
{
ac
->
dsp
.
vector_fmul_window
(
out
+
448
,
saved
+
448
,
buf
,
swindow_prev
,
64
);
memcpy
(
out
+
576
,
buf
+
64
,
448
*
sizeof
(
float
));
ac
->
f
dsp
.
vector_fmul_window
(
out
+
448
,
saved
+
448
,
buf
,
swindow_prev
,
64
);
memcpy
(
out
+
576
,
buf
+
64
,
448
*
sizeof
(
float
));
}
}
// buffer update
if
(
ics
->
window_sequence
[
0
]
==
EIGHT_SHORT_SEQUENCE
)
{
memcpy
(
saved
,
temp
+
64
,
64
*
sizeof
(
float
));
ac
->
dsp
.
vector_fmul_window
(
saved
+
64
,
buf
+
4
*
128
+
64
,
buf
+
5
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
saved
+
192
,
buf
+
5
*
128
+
64
,
buf
+
6
*
128
,
swindow
,
64
);
ac
->
dsp
.
vector_fmul_window
(
saved
+
320
,
buf
+
6
*
128
+
64
,
buf
+
7
*
128
,
swindow
,
64
);
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
memcpy
(
saved
,
temp
+
64
,
64
*
sizeof
(
float
));
ac
->
f
dsp
.
vector_fmul_window
(
saved
+
64
,
buf
+
4
*
128
+
64
,
buf
+
5
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
saved
+
192
,
buf
+
5
*
128
+
64
,
buf
+
6
*
128
,
swindow
,
64
);
ac
->
f
dsp
.
vector_fmul_window
(
saved
+
320
,
buf
+
6
*
128
+
64
,
buf
+
7
*
128
,
swindow
,
64
);
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
}
else
if
(
ics
->
window_sequence
[
0
]
==
LONG_START_SEQUENCE
)
{
memcpy
(
saved
,
buf
+
512
,
448
*
sizeof
(
float
));
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
memcpy
(
saved
,
buf
+
512
,
448
*
sizeof
(
float
));
memcpy
(
saved
+
448
,
buf
+
7
*
128
+
64
,
64
*
sizeof
(
float
));
}
else
{
// LONG_STOP or ONLY_LONG
memcpy
(
saved
,
buf
+
512
,
512
*
sizeof
(
float
));
memcpy
(
saved
,
buf
+
512
,
512
*
sizeof
(
float
));
}
}
...
...
libavcodec/ac3dec.c
View file @
e034cc6c
...
...
@@ -170,6 +170,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
ff_mdct_init
(
&
s
->
imdct_512
,
9
,
1
,
1
.
0
);
ff_kbd_window_init
(
s
->
window
,
5
.
0
,
256
);
ff_dsputil_init
(
&
s
->
dsp
,
avctx
);
avpriv_float_dsp_init
(
&
s
->
fdsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
ff_ac3dsp_init
(
&
s
->
ac3dsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
ff_fmt_convert_init
(
&
s
->
fmt_conv
,
avctx
);
av_lfg_init
(
&
s
->
dith_state
,
0
);
...
...
@@ -606,15 +607,15 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
for
(
i
=
0
;
i
<
128
;
i
++
)
x
[
i
]
=
s
->
transform_coeffs
[
ch
][
2
*
i
];
s
->
imdct_256
.
imdct_half
(
&
s
->
imdct_256
,
s
->
tmp_output
,
x
);
s
->
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
s
->
f
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
for
(
i
=
0
;
i
<
128
;
i
++
)
x
[
i
]
=
s
->
transform_coeffs
[
ch
][
2
*
i
+
1
];
s
->
imdct_256
.
imdct_half
(
&
s
->
imdct_256
,
s
->
delay
[
ch
-
1
],
x
);
}
else
{
s
->
imdct_512
.
imdct_half
(
&
s
->
imdct_512
,
s
->
tmp_output
,
s
->
transform_coeffs
[
ch
]);
s
->
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
s
->
f
dsp
.
vector_fmul_window
(
s
->
outptr
[
ch
-
1
],
s
->
delay
[
ch
-
1
],
s
->
tmp_output
,
s
->
window
,
128
);
memcpy
(
s
->
delay
[
ch
-
1
],
s
->
tmp_output
+
128
,
128
*
sizeof
(
float
));
}
}
...
...
libavcodec/ac3dec.h
View file @
e034cc6c
...
...
@@ -50,6 +50,7 @@
#ifndef AVCODEC_AC3DEC_H
#define AVCODEC_AC3DEC_H
#include "libavutil/float_dsp.h"
#include "libavutil/lfg.h"
#include "ac3.h"
#include "ac3dsp.h"
...
...
@@ -193,6 +194,7 @@ typedef struct AC3DecodeContext {
///@name Optimization
DSPContext
dsp
;
///< for optimization
AVFloatDSPContext
fdsp
;
AC3DSPContext
ac3dsp
;
FmtConvertContext
fmt_conv
;
///< optimized conversion functions
///@}
...
...
libavcodec/arm/dsputil_init_neon.c
View file @
e034cc6c
...
...
@@ -142,8 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void
ff_avg_h264_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_h264_chroma_mc2_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_butterflies_float_neon
(
float
*
v1
,
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
...
...
@@ -302,7 +300,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
avg_h264_qpel_pixels_tab
[
1
][
15
]
=
ff_avg_h264_qpel8_mc33_neon
;
}
c
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
e034cc6c
...
...
@@ -532,53 +532,6 @@ function ff_add_pixels_clamped_neon, export=1
bx
lr
endfunc
function
ff_vector_fmul_window_neon
,
export
=
1
push
{
r4
,
r5
,
lr
}
ldr
lr
,
[
sp
,
#
12
]
sub
r2
,
r2
,
#
8
sub
r5
,
lr
,
#
2
add
r2
,
r2
,
r5
,
lsl
#
2
add
r4
,
r3
,
r5
,
lsl
#
3
add
ip
,
r0
,
r5
,
lsl
#
3
mov
r5
,
#-
16
vld1.32
{
d0
,
d1
},
[
r1
,:
128
]!
vld1.32
{
d2
,
d3
},
[
r2
,:
128
],
r5
vld1.32
{
d4
,
d5
},
[
r3
,:
128
]!
vld1.32
{
d6
,
d7
},
[
r4
,:
128
],
r5
1
:
subs
lr
,
lr
,
#
4
vmul.f32
d22
,
d0
,
d4
vrev64.32
q3
,
q3
vmul.f32
d23
,
d1
,
d5
vrev64.32
q1
,
q1
vmul.f32
d20
,
d0
,
d7
vmul.f32
d21
,
d1
,
d6
beq
2
f
vmla.f32
d22
,
d3
,
d7
vld1.32
{
d0
,
d1
},
[
r1
,:
128
]!
vmla.f32
d23
,
d2
,
d6
vld1.32
{
d18
,
d19
},[
r2
,:
128
],
r5
vmls.f32
d20
,
d3
,
d4
vld1.32
{
d24
,
d25
},[
r3
,:
128
]!
vmls.f32
d21
,
d2
,
d5
vld1.32
{
d6
,
d7
},
[
r4
,:
128
],
r5
vmov
q1
,
q9
vrev64.32
q11
,
q11
vmov
q2
,
q12
vswp
d22
,
d23
vst1.32
{
d20
,
d21
},[
r0
,:
128
]!
vst1.32
{
d22
,
d23
},[
ip
,:
128
],
r5
b
1
b
2
:
vmla.f32
d22
,
d3
,
d7
vmla.f32
d23
,
d2
,
d6
vmls.f32
d20
,
d3
,
d4
vmls.f32
d21
,
d2
,
d5
vrev64.32
q11
,
q11
vswp
d22
,
d23
vst1.32
{
d20
,
d21
},[
r0
,:
128
]!
vst1.32
{
d22
,
d23
},[
ip
,:
128
],
r5
pop
{
r4
,
r5
,
pc
}
endfunc
#if CONFIG_VORBIS_DECODER
function
ff_vorbis_inverse_coupling_neon
,
export
=
1
vmov.i32
q10
,
#
1
<<
31
...
...
libavcodec/atrac1.c
View file @
e034cc6c
...
...
@@ -32,6 +32,7 @@
#include <stddef.h>
#include <stdio.h>
#include "libavutil/float_dsp.h"
#include "avcodec.h"
#include "get_bits.h"
#include "dsputil.h"
...
...
@@ -81,7 +82,7 @@ typedef struct {
DECLARE_ALIGNED
(
32
,
float
,
high
)[
512
];
float
*
bands
[
3
];
FFTContext
mdct_ctx
[
3
];
DSPContext
dsp
;
AVFloat
DSPContext
f
dsp
;
}
AT1Ctx
;
/** size of the transform in samples in the long mode for each QMF band */
...
...
@@ -141,8 +142,8 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
at1_imdct
(
q
,
&
q
->
spec
[
pos
],
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
],
nbits
,
band_num
);
/* overlap and window */
q
->
dsp
.
vector_fmul_window
(
&
q
->
bands
[
band_num
][
start_pos
],
prev_buf
,
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
],
ff_sine_32
,
16
);
q
->
f
dsp
.
vector_fmul_window
(
&
q
->
bands
[
band_num
][
start_pos
],
prev_buf
,
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
],
ff_sine_32
,
16
);
prev_buf
=
&
su
->
spectrum
[
0
][
ref_pos
+
start_pos
+
16
];
start_pos
+=
block_size
;
...
...
@@ -357,7 +358,7 @@ static av_cold int atrac1_decode_init(AVCodecContext *avctx)
ff_atrac_generate_tables
();
ff_dsputil
_init
(
&
q
->
dsp
,
avctx
);
avpriv_float_dsp
_init
(
&
q
->
f
dsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
q
->
bands
[
0
]
=
q
->
low
;
q
->
bands
[
1
]
=
q
->
mid
;
...
...
libavcodec/dsputil.c
View file @
e034cc6c
...
...
@@ -2367,23 +2367,6 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
static
void
vector_fmul_window_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
for
(
i
=-
len
,
j
=
len
-
1
;
i
<
0
;
i
++
,
j
--
)
{
float
s0
=
src0
[
i
];
float
s1
=
src1
[
j
];
float
wi
=
win
[
i
];
float
wj
=
win
[
j
];
dst
[
i
]
=
s0
*
wj
-
s1
*
wi
;
dst
[
j
]
=
s0
*
wi
+
s1
*
wj
;
}
}
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
)
{
...
...
@@ -2839,7 +2822,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
#endif
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
c
->
vector_fmul_window
=
vector_fmul_window_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
...
...
libavcodec/dsputil.h
View file @
e034cc6c
...
...
@@ -352,8 +352,6 @@ typedef struct DSPContext {
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
/**
...
...
libavcodec/nellymoserdec.c
View file @
e034cc6c
...
...
@@ -32,6 +32,7 @@
*/
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/lfg.h"
#include "libavutil/random_seed.h"
#include "avcodec.h"
...
...
@@ -52,7 +53,7 @@ typedef struct NellyMoserDecodeContext {
AVLFG
random_state
;
GetBitContext
gb
;
float
scale_bias
;
DSPContext
dsp
;
AVFloat
DSPContext
f
dsp
;
FFTContext
imdct_ctx
;
DECLARE_ALIGNED
(
32
,
float
,
imdct_buf
)[
2
][
NELLY_BUF_LEN
];
float
*
imdct_out
;
...
...
@@ -107,7 +108,9 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
(
NELLY_BUF_LEN
-
NELLY_FILL_LEN
)
*
sizeof
(
float
));
s
->
imdct_ctx
.
imdct_half
(
&
s
->
imdct_ctx
,
s
->
imdct_out
,
aptr
);
s
->
dsp
.
vector_fmul_window
(
aptr
,
s
->
imdct_prev
+
NELLY_BUF_LEN
/
2
,
s
->
imdct_out
,
ff_sine_128
,
NELLY_BUF_LEN
/
2
);
s
->
fdsp
.
vector_fmul_window
(
aptr
,
s
->
imdct_prev
+
NELLY_BUF_LEN
/
2
,
s
->
imdct_out
,
ff_sine_128
,
NELLY_BUF_LEN
/
2
);
FFSWAP
(
float
*
,
s
->
imdct_out
,
s
->
imdct_prev
);
}
}
...
...
@@ -121,7 +124,7 @@ static av_cold int decode_init(AVCodecContext * avctx) {
av_lfg_init
(
&
s
->
random_state
,
0
);
ff_mdct_init
(
&
s
->
imdct_ctx
,
8
,
1
,
1
.
0
);
ff_dsputil
_init
(
&
s
->
dsp
,
avctx
);
avpriv_float_dsp
_init
(
&
s
->
f
dsp
,
avctx
->
flags
&
CODEC_FLAG_BITEXACT
);
s
->
scale_bias
=
1
.
0
/
(
32768
*
8
);
avctx
->
sample_fmt
=
AV_SAMPLE_FMT_FLT
;
...
...
libavcodec/ppc/float_altivec.c
View file @
e034cc6c
...
...
@@ -75,43 +75,8 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
}
}
static
void
vector_fmul_window_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
vector
float
zero
,
t0
,
t1
,
s0
,
s1
,
wi
,
wj
;
const
vector
unsigned
char
reverse
=
vcprm
(
3
,
2
,
1
,
0
);
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
zero
=
(
vector
float
)
vec_splat_u32
(
0
);
for
(
i
=-
len
*
4
,
j
=
len
*
4
-
16
;
i
<
0
;
i
+=
16
,
j
-=
16
)
{
s0
=
vec_ld
(
i
,
src0
);
s1
=
vec_ld
(
j
,
src1
);
wi
=
vec_ld
(
i
,
win
);
wj
=
vec_ld
(
j
,
win
);
s1
=
vec_perm
(
s1
,
s1
,
reverse
);
wj
=
vec_perm
(
wj
,
wj
,
reverse
);
t0
=
vec_madd
(
s0
,
wj
,
zero
);
t0
=
vec_nmsub
(
s1
,
wi
,
t0
);
t1
=
vec_madd
(
s0
,
wi
,
zero
);
t1
=
vec_madd
(
s1
,
wj
,
t1
);
t1
=
vec_perm
(
t1
,
t1
,
reverse
);
vec_st
(
t0
,
i
,
dst
);
vec_st
(
t1
,
j
,
dst
);
}
}
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_add
=
vector_fmul_add_altivec
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
vector_fmul_window
=
vector_fmul_window_altivec
;
}
}
libavcodec/twinvq.c
View file @
e034cc6c
...
...
@@ -650,11 +650,10 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
mdct
->
imdct_half
(
mdct
,
buf1
+
bsize
*
j
,
in
+
bsize
*
j
);
tctx
->
dsp
.
vector_fmul_window
(
out2
,
prev_buf
+
(
bsize
-
wsize
)
/
2
,
buf1
+
bsize
*
j
,
ff_sine_windows
[
av_log2
(
wsize
)],
wsize
/
2
);
tctx
->
fdsp
.
vector_fmul_window
(
out2
,
prev_buf
+
(
bsize
-
wsize
)
/
2
,
buf1
+
bsize
*
j
,
ff_sine_windows
[
av_log2
(
wsize
)],
wsize
/
2
);
out2
+=
wsize
;
memcpy
(
out2
,
buf1
+
bsize
*
j
+
wsize
/
2
,
(
bsize
-
wsize
/
2
)
*
sizeof
(
float
));
...
...
libavcodec/vorbisdec.c
View file @
e034cc6c
...
...
@@ -1620,13 +1620,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc, float **floor_ptr)
const
float
*
win
=
vc
->
win
[
blockflag
&
previous_window
];
if
(
blockflag
==
previous_window
)
{
vc
->
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
blocksize
/
4
);
vc
->
f
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
blocksize
/
4
);
}
else
if
(
blockflag
>
previous_window
)
{
vc
->
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
bs0
/
4
);
vc
->
f
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
bs0
/
4
);
memcpy
(
ret
+
bs0
/
2
,
buf
+
bs0
/
4
,
((
bs1
-
bs0
)
/
4
)
*
sizeof
(
float
));
}
else
{
memcpy
(
ret
,
saved
,
((
bs1
-
bs0
)
/
4
)
*
sizeof
(
float
));
vc
->
dsp
.
vector_fmul_window
(
ret
+
(
bs1
-
bs0
)
/
4
,
saved
+
(
bs1
-
bs0
)
/
4
,
buf
,
win
,
bs0
/
4
);
vc
->
f
dsp
.
vector_fmul_window
(
ret
+
(
bs1
-
bs0
)
/
4
,
saved
+
(
bs1
-
bs0
)
/
4
,
buf
,
win
,
bs0
/
4
);
}
memcpy
(
saved
,
buf
+
blocksize
/
4
,
blocksize
/
4
*
sizeof
(
float
));
}
...
...
libavcodec/wmaprodec.c
View file @
e034cc6c
...
...
@@ -1046,8 +1046,8 @@ static void wmapro_window(WMAProDecodeCtx *s)
winlen
>>=
1
;
s
->
dsp
.
vector_fmul_window
(
start
,
start
,
start
+
winlen
,
window
,
winlen
);
s
->
f
dsp
.
vector_fmul_window
(
start
,
start
,
start
+
winlen
,
window
,
winlen
);
s
->
channel
[
c
].
prev_block_len
=
s
->
subframe_len
;
}
...
...
libavcodec/x86/dsputil_mmx.c
View file @
e034cc6c
...
...
@@ -1892,72 +1892,6 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
}
}
#if HAVE_6REGS
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
x86_reg
i
=
-
len
*
4
;
x86_reg
j
=
len
*
4
-
8
;
__asm__
volatile
(
"1:
\n
"
"pswapd (%5, %1), %%mm1
\n
"
"movq (%5, %0), %%mm0
\n
"
"pswapd (%4, %1), %%mm5
\n
"
"movq (%3, %0), %%mm4
\n
"
"movq %%mm0, %%mm2
\n
"
"movq %%mm1, %%mm3
\n
"
"pfmul %%mm4, %%mm2
\n
"
// src0[len + i] * win[len + i]
"pfmul %%mm5, %%mm3
\n
"
// src1[j] * win[len + j]
"pfmul %%mm4, %%mm1
\n
"
// src0[len + i] * win[len + j]
"pfmul %%mm5, %%mm0
\n
"
// src1[j] * win[len + i]
"pfadd %%mm3, %%mm2
\n
"
"pfsub %%mm0, %%mm1
\n
"
"pswapd %%mm2, %%mm2
\n
"
"movq %%mm1, (%2, %0)
\n
"
"movq %%mm2, (%2, %1)
\n
"
"sub $8, %1
\n
"
"add $8, %0
\n
"
"jl 1b
\n
"
"femms
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
),
"r"
(
src0
+
len
),
"r"
(
src1
),
"r"
(
win
+
len
)
);
}
static
void
vector_fmul_window_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
x86_reg
i
=
-
len
*
4
;
x86_reg
j
=
len
*
4
-
16
;
__asm__
volatile
(
"1:
\n
"
"movaps (%5, %1), %%xmm1
\n
"
"movaps (%5, %0), %%xmm0
\n
"
"movaps (%4, %1), %%xmm5
\n
"
"movaps (%3, %0), %%xmm4
\n
"
"shufps $0x1b, %%xmm1, %%xmm1
\n
"
"shufps $0x1b, %%xmm5, %%xmm5
\n
"
"movaps %%xmm0, %%xmm2
\n
"
"movaps %%xmm1, %%xmm3
\n
"
"mulps %%xmm4, %%xmm2
\n
"
// src0[len + i] * win[len + i]
"mulps %%xmm5, %%xmm3
\n
"
// src1[j] * win[len + j]
"mulps %%xmm4, %%xmm1
\n
"
// src0[len + i] * win[len + j]
"mulps %%xmm5, %%xmm0
\n
"
// src1[j] * win[len + i]
"addps %%xmm3, %%xmm2
\n
"
"subps %%xmm0, %%xmm1
\n
"
"shufps $0x1b, %%xmm2, %%xmm2
\n
"
"movaps %%xmm1, (%2, %0)
\n
"
"movaps %%xmm2, (%2, %1)
\n
"
"sub $16, %1
\n
"
"add $16, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
),
"r"
(
src0
+
len
),
"r"
(
src1
),
"r"
(
win
+
len
)
);
}
#endif
/* HAVE_6REGS */
static
void
vector_clipf_sse
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
int
len
)
{
...
...
@@ -2320,14 +2254,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
#endif
/* HAVE_YASM */
}
static
void
dsputil_init_3dnowext
(
DSPContext
*
c
,
AVCodecContext
*
avctx
,
int
mm_flags
)
{
#if HAVE_AMD3DNOWEXT_INLINE && HAVE_6REGS
c
->
vector_fmul_window
=
vector_fmul_window_3dnowext
;
#endif
}
static
void
dsputil_init_sse
(
DSPContext
*
c
,
AVCodecContext
*
avctx
,
int
mm_flags
)
{
const
int
high_bit_depth
=
avctx
->
bits_per_raw_sample
>
8
;
...
...
@@ -2343,10 +2269,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
#if HAVE_6REGS
c
->
vector_fmul_window
=
vector_fmul_window_sse
;
#endif
c
->
vector_clipf
=
vector_clipf_sse
;
#endif
/* HAVE_INLINE_ASM */
...
...
@@ -2530,9 +2452,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
dsputil_init_3dnow
(
c
,
avctx
,
mm_flags
);
if
(
mm_flags
&
AV_CPU_FLAG_3DNOWEXT
)
dsputil_init_3dnowext
(
c
,
avctx
,
mm_flags
);
if
(
mm_flags
&
AV_CPU_FLAG_SSE
)
dsputil_init_sse
(
c
,
avctx
,
mm_flags
);
...
...
libavutil/arm/float_dsp_init_neon.c
View file @
e034cc6c
...
...
@@ -32,9 +32,13 @@ void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
void
ff_vector_fmul_scalar_neon
(
float
*
dst
,
const
float
*
src
,
float
mul
,
int
len
);
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
{
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
}
libavutil/arm/float_dsp_neon.S
View file @
e034cc6c
...
...
@@ -146,3 +146,50 @@ NOVFP vdup.32 q8, r2
bx
lr
.
unreq
len
endfunc
function
ff_vector_fmul_window_neon
,
export
=
1
push
{
r4
,
r5
,
lr
}
ldr
lr
,
[
sp
,
#
12
]
sub
r2
,
r2
,
#
8
sub
r5
,
lr
,
#
2
add
r2
,
r2
,
r5
,
lsl
#
2
add
r4
,
r3
,
r5
,
lsl
#
3
add
ip
,
r0
,
r5
,
lsl
#
3
mov
r5
,
#-
16
vld1.32
{
d0
,
d1
},
[
r1
,:
128
]!
vld1.32
{
d2
,
d3
},
[
r2
,:
128
],
r5
vld1.32
{
d4
,
d5
},
[
r3
,:
128
]!
vld1.32
{
d6
,
d7
},
[
r4
,:
128
],
r5
1
:
subs
lr
,
lr
,
#
4
vmul.f32
d22
,
d0
,
d4
vrev64.32
q3
,
q3
vmul.f32
d23
,
d1
,
d5
vrev64.32
q1
,
q1
vmul.f32
d20
,
d0
,
d7
vmul.f32
d21
,
d1
,
d6
beq
2
f
vmla.f32
d22
,
d3
,
d7
vld1.32
{
d0
,
d1
},
[
r1
,:
128
]!
vmla.f32
d23
,
d2
,
d6
vld1.32
{
d18
,
d19
},[
r2
,:
128
],
r5
vmls.f32
d20
,
d3
,
d4
vld1.32
{
d24
,
d25
},[
r3
,:
128
]!
vmls.f32
d21
,
d2
,
d5
vld1.32
{
d6
,
d7
},
[
r4
,:
128
],
r5
vmov
q1
,
q9
vrev64.32
q11
,
q11
vmov
q2
,
q12
vswp
d22
,
d23
vst1.32
{
d20
,
d21
},[
r0
,:
128
]!
vst1.32
{
d22
,
d23
},[
ip
,:
128
],
r5
b
1
b
2
:
vmla.f32
d22
,
d3
,
d7
vmla.f32
d23
,
d2
,
d6
vmls.f32
d20
,
d3
,
d4
vmls.f32
d21
,
d2
,
d5
vrev64.32
q11
,
q11
vswp
d22
,
d23
vst1.32
{
d20
,
d21
},[
r0
,:
128
]!
vst1.32
{
d22
,
d23
},[
ip
,:
128
],
r5
pop
{
r4
,
r5
,
pc
}
endfunc
libavutil/float_dsp.c
View file @
e034cc6c
...
...
@@ -52,12 +52,32 @@ static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
dst
[
i
]
=
src
[
i
]
*
mul
;
}
static
void
vector_fmul_window_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
)
{
int
i
,
j
;
dst
+=
len
;
win
+=
len
;
src0
+=
len
;
for
(
i
=
-
len
,
j
=
len
-
1
;
i
<
0
;
i
++
,
j
--
)
{
float
s0
=
src0
[
i
];
float
s1
=
src1
[
j
];
float
wi
=
win
[
i
];
float
wj
=
win
[
j
];
dst
[
i
]
=
s0
*
wj
-
s1
*
wi
;
dst
[
j
]
=
s0
*
wi
+
s1
*
wj
;
}
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp