Commit c73d99e6 authored by Justin Ruggles's avatar Justin Ruggles Committed by Mans Rullgard
Browse files

Separate format conversion DSP functions from DSPContext.



This will be beneficial for use with the audio conversion API without
requiring it to depend on all of dsputil.
Signed-off-by: default avatarMans Rullgard <mans@mansr.com>
parent 770c410f
......@@ -12,6 +12,7 @@ OBJS = allcodecs.o \
bitstream_filter.o \
dsputil.o \
faanidct.o \
fmtconvert.o \
imgconvert.o \
jrevdct.o \
opt.o \
......
......@@ -35,6 +35,7 @@
#include "fft.h"
#include "mpeg4audio.h"
#include "sbr.h"
#include "fmtconvert.h"
#include <stdint.h>
......@@ -268,6 +269,7 @@ typedef struct {
FFTContext mdct;
FFTContext mdct_small;
DSPContext dsp;
FmtConvertContext fmt_conv;
int random_state;
/** @} */
......
......@@ -85,6 +85,7 @@
#include "get_bits.h"
#include "dsputil.h"
#include "fft.h"
#include "fmtconvert.h"
#include "lpc.h"
#include "aac.h"
......@@ -562,6 +563,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
ff_aac_sbr_init();
dsputil_init(&ac->dsp, avctx);
ff_fmt_convert_init(&ac->fmt_conv, avctx);
ac->random_state = 0x1f2e3d4c;
......@@ -2032,7 +2034,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
*data_size = data_size_tmp;
if (samples)
ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
if (ac->output_configured)
ac->output_configured = OC_LOCKED;
......
......@@ -193,6 +193,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
ff_mdct_init(&s->imdct_512, 9, 1, 1.0);
ff_kbd_window_init(s->window, 5.0, 256);
dsputil_init(&s->dsp, avctx);
ff_fmt_convert_init(&s->fmt_conv, avctx);
av_lfg_init(&s->dith_state, 0);
/* set scale value for float to int16 conversion */
......@@ -1255,7 +1256,7 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
} else {
gain *= s->dynamic_range[0];
}
s->dsp.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
s->fmt_conv.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
}
/* apply spectral extension to high frequency bins */
......@@ -1407,7 +1408,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
err = 1;
}
s->dsp.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
out_samples += 256 * s->out_channels;
}
*data_size = s->num_blocks * 256 * avctx->channels * sizeof (int16_t);
......
......@@ -55,6 +55,7 @@
#include "get_bits.h"
#include "dsputil.h"
#include "fft.h"
#include "fmtconvert.h"
/* override ac3.h to include coupling channel */
#undef AC3_MAX_CHANNELS
......@@ -190,6 +191,7 @@ typedef struct {
///@defgroup opt optimization
DSPContext dsp; ///< for optimization
FmtConvertContext fmt_conv; ///< optimized conversion functions
float mul_bias; ///< scaling for float_to_int16 conversion
///@}
......
......@@ -9,6 +9,7 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
OBJS += arm/dsputil_init_arm.o \
arm/dsputil_arm.o \
arm/fft_init_arm.o \
arm/fmtconvert_init_arm.o \
arm/jrevdct_arm.o \
arm/mpegvideo_arm.o \
arm/simple_idct_arm.o \
......@@ -22,8 +23,11 @@ OBJS-$(HAVE_ARMV6) += arm/dsputil_init_armv6.o \
arm/dsputil_armv6.o \
arm/simple_idct_armv6.o \
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \
OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \
arm/dsputil_init_vfp.o \
$(VFP-OBJS-yes)
OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \
arm/mpegvideo_iwmmxt.o \
......@@ -52,6 +56,7 @@ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \
OBJS-$(HAVE_NEON) += arm/dsputil_init_neon.o \
arm/dsputil_neon.o \
arm/fmtconvert_neon.o \
arm/int_neon.o \
arm/mpegvideo_neon.o \
arm/simple_idct_neon.o \
......
......@@ -153,8 +153,6 @@ void ff_sv_fmul_scalar_4_neon(float *dst, const float **vp, float mul,
int len);
void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
float mul, int len);
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
const float *src1, int len);
void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
......@@ -162,8 +160,6 @@ void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
void ff_float_to_int16_neon(int16_t *, const float *, long);
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
......@@ -308,7 +304,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
c->butterflies_float = ff_butterflies_float_neon;
c->scalarproduct_float = ff_scalarproduct_float_neon;
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
c->vector_fmul_add = ff_vector_fmul_add_neon;
c->vector_clipf = ff_vector_clipf_neon;
......@@ -319,11 +314,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->sv_fmul_scalar[0] = ff_sv_fmul_scalar_2_neon;
c->sv_fmul_scalar[1] = ff_sv_fmul_scalar_4_neon;
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->float_to_int16 = ff_float_to_int16_neon;
c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
}
if (CONFIG_VORBIS_DECODER)
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
......
......@@ -25,13 +25,9 @@ void ff_vector_fmul_vfp(float *dst, const float *src0,
const float *src1, int len);
void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
const float *src1, int len);
void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len);
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx)
{
c->vector_fmul = ff_vector_fmul_vfp;
c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
#if HAVE_ARMV6
c->float_to_int16 = ff_float_to_int16_vfp;
#endif
}
......@@ -400,343 +400,6 @@ function ff_add_pixels_clamped_neon, export=1
bx lr
endfunc
function ff_float_to_int16_neon, export=1
subs r2, r2, #8
vld1.64 {d0-d1}, [r1,:128]!
vcvt.s32.f32 q8, q0, #16
vld1.64 {d2-d3}, [r1,:128]!
vcvt.s32.f32 q9, q1, #16
beq 3f
bics ip, r2, #15
beq 2f
1: subs ip, ip, #16
vshrn.s32 d4, q8, #16
vld1.64 {d0-d1}, [r1,:128]!
vcvt.s32.f32 q0, q0, #16
vshrn.s32 d5, q9, #16
vld1.64 {d2-d3}, [r1,:128]!
vcvt.s32.f32 q1, q1, #16
vshrn.s32 d6, q0, #16
vst1.64 {d4-d5}, [r0,:128]!
vshrn.s32 d7, q1, #16
vld1.64 {d16-d17},[r1,:128]!
vcvt.s32.f32 q8, q8, #16
vld1.64 {d18-d19},[r1,:128]!
vcvt.s32.f32 q9, q9, #16
vst1.64 {d6-d7}, [r0,:128]!
bne 1b
ands r2, r2, #15
beq 3f
2: vld1.64 {d0-d1}, [r1,:128]!
vshrn.s32 d4, q8, #16
vcvt.s32.f32 q0, q0, #16
vld1.64 {d2-d3}, [r1,:128]!
vshrn.s32 d5, q9, #16
vcvt.s32.f32 q1, q1, #16
vshrn.s32 d6, q0, #16
vst1.64 {d4-d5}, [r0,:128]!
vshrn.s32 d7, q1, #16
vst1.64 {d6-d7}, [r0,:128]!
bx lr
3: vshrn.s32 d4, q8, #16
vshrn.s32 d5, q9, #16
vst1.64 {d4-d5}, [r0,:128]!
bx lr
endfunc
function ff_float_to_int16_interleave_neon, export=1
cmp r3, #2
ldrlt r1, [r1]
blt ff_float_to_int16_neon
bne 4f
ldr r3, [r1]
ldr r1, [r1, #4]
subs r2, r2, #8
vld1.64 {d0-d1}, [r3,:128]!
vcvt.s32.f32 q8, q0, #16
vld1.64 {d2-d3}, [r3,:128]!
vcvt.s32.f32 q9, q1, #16
vld1.64 {d20-d21},[r1,:128]!
vcvt.s32.f32 q10, q10, #16
vld1.64 {d22-d23},[r1,:128]!
vcvt.s32.f32 q11, q11, #16
beq 3f
bics ip, r2, #15
beq 2f
1: subs ip, ip, #16
vld1.64 {d0-d1}, [r3,:128]!
vcvt.s32.f32 q0, q0, #16
vsri.32 q10, q8, #16
vld1.64 {d2-d3}, [r3,:128]!
vcvt.s32.f32 q1, q1, #16
vld1.64 {d24-d25},[r1,:128]!
vcvt.s32.f32 q12, q12, #16
vld1.64 {d26-d27},[r1,:128]!
vsri.32 q11, q9, #16
vst1.64 {d20-d21},[r0,:128]!
vcvt.s32.f32 q13, q13, #16
vst1.64 {d22-d23},[r0,:128]!
vsri.32 q12, q0, #16
vld1.64 {d16-d17},[r3,:128]!
vsri.32 q13, q1, #16
vst1.64 {d24-d25},[r0,:128]!
vcvt.s32.f32 q8, q8, #16
vld1.64 {d18-d19},[r3,:128]!
vcvt.s32.f32 q9, q9, #16
vld1.64 {d20-d21},[r1,:128]!
vcvt.s32.f32 q10, q10, #16
vld1.64 {d22-d23},[r1,:128]!
vcvt.s32.f32 q11, q11, #16
vst1.64 {d26-d27},[r0,:128]!
bne 1b
ands r2, r2, #15
beq 3f
2: vsri.32 q10, q8, #16
vld1.64 {d0-d1}, [r3,:128]!
vcvt.s32.f32 q0, q0, #16
vld1.64 {d2-d3}, [r3,:128]!
vcvt.s32.f32 q1, q1, #16
vld1.64 {d24-d25},[r1,:128]!
vcvt.s32.f32 q12, q12, #16
vsri.32 q11, q9, #16
vld1.64 {d26-d27},[r1,:128]!
vcvt.s32.f32 q13, q13, #16
vst1.64 {d20-d21},[r0,:128]!
vsri.32 q12, q0, #16
vst1.64 {d22-d23},[r0,:128]!
vsri.32 q13, q1, #16
vst1.64 {d24-d27},[r0,:128]!
bx lr
3: vsri.32 q10, q8, #16
vsri.32 q11, q9, #16
vst1.64 {d20-d23},[r0,:128]!
bx lr
4: push {r4-r8,lr}
cmp r3, #4
lsl ip, r3, #1
blt 4f
@ 4 channels
5: ldmia r1!, {r4-r7}
mov lr, r2
mov r8, r0
vld1.64 {d16-d17},[r4,:128]!
vcvt.s32.f32 q8, q8, #16
vld1.64 {d18-d19},[r5,:128]!
vcvt.s32.f32 q9, q9, #16
vld1.64 {d20-d21},[r6,:128]!
vcvt.s32.f32 q10, q10, #16
vld1.64 {d22-d23},[r7,:128]!
vcvt.s32.f32 q11, q11, #16
6: subs lr, lr, #8
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
vsri.32 q9, q8, #16
vld1.64 {d2-d3}, [r5,:128]!
vcvt.s32.f32 q1, q1, #16
vsri.32 q11, q10, #16
vld1.64 {d4-d5}, [r6,:128]!
vcvt.s32.f32 q2, q2, #16
vzip.32 d18, d22
vld1.64 {d6-d7}, [r7,:128]!
vcvt.s32.f32 q3, q3, #16
vzip.32 d19, d23
vst1.64 {d18}, [r8], ip
vsri.32 q1, q0, #16
vst1.64 {d22}, [r8], ip
vsri.32 q3, q2, #16
vst1.64 {d19}, [r8], ip
vzip.32 d2, d6
vst1.64 {d23}, [r8], ip
vzip.32 d3, d7
beq 7f
vld1.64 {d16-d17},[r4,:128]!
vcvt.s32.f32 q8, q8, #16
vst1.64 {d2}, [r8], ip
vld1.64 {d18-d19},[r5,:128]!
vcvt.s32.f32 q9, q9, #16
vst1.64 {d6}, [r8], ip
vld1.64 {d20-d21},[r6,:128]!
vcvt.s32.f32 q10, q10, #16
vst1.64 {d3}, [r8], ip
vld1.64 {d22-d23},[r7,:128]!
vcvt.s32.f32 q11, q11, #16
vst1.64 {d7}, [r8], ip
b 6b
7: vst1.64 {d2}, [r8], ip
vst1.64 {d6}, [r8], ip
vst1.64 {d3}, [r8], ip
vst1.64 {d7}, [r8], ip
subs r3, r3, #4
popeq {r4-r8,pc}
cmp r3, #4
add r0, r0, #8
bge 5b
@ 2 channels
4: cmp r3, #2
blt 4f
ldmia r1!, {r4-r5}
mov lr, r2
mov r8, r0
tst lr, #8
vld1.64 {d16-d17},[r4,:128]!
vcvt.s32.f32 q8, q8, #16
vld1.64 {d18-d19},[r5,:128]!
vcvt.s32.f32 q9, q9, #16
vld1.64 {d20-d21},[r4,:128]!
vcvt.s32.f32 q10, q10, #16
vld1.64 {d22-d23},[r5,:128]!
vcvt.s32.f32 q11, q11, #16
beq 6f
subs lr, lr, #8
beq 7f
vsri.32 d18, d16, #16
vsri.32 d19, d17, #16
vld1.64 {d16-d17},[r4,:128]!
vcvt.s32.f32 q8, q8, #16
vst1.32 {d18[0]}, [r8], ip
vsri.32 d22, d20, #16
vst1.32 {d18[1]}, [r8], ip
vsri.32 d23, d21, #16
vst1.32 {d19[0]}, [r8], ip
vst1.32 {d19[1]}, [r8], ip
vld1.64 {d18-d19},[r5,:128]!
vcvt.s32.f32 q9, q9, #16
vst1.32 {d22[0]}, [r8], ip
vst1.32 {d22[1]}, [r8], ip
vld1.64 {d20-d21},[r4,:128]!
vcvt.s32.f32 q10, q10, #16
vst1.32 {d23[0]}, [r8], ip
vst1.32 {d23[1]}, [r8], ip
vld1.64 {d22-d23},[r5,:128]!
vcvt.s32.f32 q11, q11, #16
6: subs lr, lr, #16
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
vsri.32 d18, d16, #16
vld1.64 {d2-d3}, [r5,:128]!
vcvt.s32.f32 q1, q1, #16
vsri.32 d19, d17, #16
vld1.64 {d4-d5}, [r4,:128]!
vcvt.s32.f32 q2, q2, #16
vld1.64 {d6-d7}, [r5,:128]!
vcvt.s32.f32 q3, q3, #16
vst1.32 {d18[0]}, [r8], ip
vsri.32 d22, d20, #16
vst1.32 {d18[1]}, [r8], ip
vsri.32 d23, d21, #16
vst1.32 {d19[0]}, [r8], ip
vsri.32 d2, d0, #16
vst1.32 {d19[1]}, [r8], ip
vsri.32 d3, d1, #16
vst1.32 {d22[0]}, [r8], ip
vsri.32 d6, d4, #16
vst1.32 {d22[1]}, [r8], ip
vsri.32 d7, d5, #16
vst1.32 {d23[0]}, [r8], ip
vst1.32 {d23[1]}, [r8], ip
beq 6f
vld1.64 {d16-d17},[r4,:128]!
vcvt.s32.f32 q8, q8, #16
vst1.32 {d2[0]}, [r8], ip
vst1.32 {d2[1]}, [r8], ip
vld1.64 {d18-d19},[r5,:128]!
vcvt.s32.f32 q9, q9, #16
vst1.32 {d3[0]}, [r8], ip
vst1.32 {d3[1]}, [r8], ip
vld1.64 {d20-d21},[r4,:128]!
vcvt.s32.f32 q10, q10, #16
vst1.32 {d6[0]}, [r8], ip
vst1.32 {d6[1]}, [r8], ip
vld1.64 {d22-d23},[r5,:128]!
vcvt.s32.f32 q11, q11, #16
vst1.32 {d7[0]}, [r8], ip
vst1.32 {d7[1]}, [r8], ip
bgt 6b
6: vst1.32 {d2[0]}, [r8], ip
vst1.32 {d2[1]}, [r8], ip
vst1.32 {d3[0]}, [r8], ip
vst1.32 {d3[1]}, [r8], ip
vst1.32 {d6[0]}, [r8], ip
vst1.32 {d6[1]}, [r8], ip
vst1.32 {d7[0]}, [r8], ip
vst1.32 {d7[1]}, [r8], ip
b 8f
7: vsri.32 d18, d16, #16
vsri.32 d19, d17, #16
vst1.32 {d18[0]}, [r8], ip
vsri.32 d22, d20, #16
vst1.32 {d18[1]}, [r8], ip
vsri.32 d23, d21, #16
vst1.32 {d19[0]}, [r8], ip
vst1.32 {d19[1]}, [r8], ip
vst1.32 {d22[0]}, [r8], ip
vst1.32 {d22[1]}, [r8], ip
vst1.32 {d23[0]}, [r8], ip
vst1.32 {d23[1]}, [r8], ip
8: subs r3, r3, #2
add r0, r0, #4
popeq {r4-r8,pc}
@ 1 channel
4: ldr r4, [r1],#4
tst r2, #8
mov lr, r2
mov r5, r0
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
vld1.64 {d2-d3}, [r4,:128]!
vcvt.s32.f32 q1, q1, #16
bne 8f
6: subs lr, lr, #16
vld1.64 {d4-d5}, [r4,:128]!
vcvt.s32.f32 q2, q2, #16
vld1.64 {d6-d7}, [r4,:128]!
vcvt.s32.f32 q3, q3, #16
vst1.16 {d0[1]}, [r5,:16], ip
vst1.16 {d0[3]}, [r5,:16], ip
vst1.16 {d1[1]}, [r5,:16], ip
vst1.16 {d1[3]}, [r5,:16], ip
vst1.16 {d2[1]}, [r5,:16], ip
vst1.16 {d2[3]}, [r5,:16], ip
vst1.16 {d3[1]}, [r5,:16], ip
vst1.16 {d3[3]}, [r5,:16], ip
beq 7f
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
vld1.64 {d2-d3}, [r4,:128]!
vcvt.s32.f32 q1, q1, #16
7: vst1.16 {d4[1]}, [r5,:16], ip
vst1.16 {d4[3]}, [r5,:16], ip
vst1.16 {d5[1]}, [r5,:16], ip
vst1.16 {d5[3]}, [r5,:16], ip
vst1.16 {d6[1]}, [r5,:16], ip
vst1.16 {d6[3]}, [r5,:16], ip
vst1.16 {d7[1]}, [r5,:16], ip
vst1.16 {d7[3]}, [r5,:16], ip
bgt 6b
pop {r4-r8,pc}
8: subs lr, lr, #8
vst1.16 {d0[1]}, [r5,:16], ip
vst1.16 {d0[3]}, [r5,:16], ip
vst1.16 {d1[1]}, [r5,:16], ip
vst1.16 {d1[3]}, [r5,:16], ip
vst1.16 {d2[1]}, [r5,:16], ip
vst1.16 {d2[3]}, [r5,:16], ip
vst1.16 {d3[1]}, [r5,:16], ip
vst1.16 {d3[3]}, [r5,:16], ip
popeq {r4-r8,pc}
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
vld1.64 {d2-d3}, [r4,:128]!
vcvt.s32.f32 q1, q1, #16
b 6b
endfunc
function ff_vector_fmul_neon, export=1
subs r3, r3, #8
vld1.64 {d0-d3}, [r1,:128]!
......@@ -1050,34 +713,6 @@ NOVFP vmov.32 r0, d0[0]
bx lr
endfunc
function ff_int32_to_float_fmul_scalar_neon, export=1
VFP vdup.32 q0, d0[0]
VFP len .req r2
NOVFP vdup.32 q0, r2
NOVFP len .req r3
vld1.32 {q1},[r1,:128]!
vcvt.f32.s32 q3, q1
vld1.32 {q2},[r1,:128]!
vcvt.f32.s32 q8, q2
1: subs len, len, #8
pld [r1, #16]
vmul.f32 q9, q3, q0
vmul.f32 q10, q8, q0
beq 2f
vld1.32 {q1},[r1,:128]!
vcvt.f32.s32 q3, q1
vld1.32 {q2},[r1,:128]!
vcvt.f32.s32 q8, q2
vst1.32 {q9}, [r0,:128]!
vst1.32 {q10},[r0,:128]!
b 1b
2: vst1.32 {q9}, [r0,:128]!
vst1.32 {q10},[r0,:128]!
bx lr
.unreq len
endfunc
function ff_vector_fmul_reverse_neon, export=1
add r2, r2, r3, lsl #2
sub r2, r2, #32
......
......@@ -131,58 +131,3 @@ function ff_vector_fmul_reverse_vfp, export=1
vpop {d8-d15}
bx lr
endfunc
#if HAVE_ARMV6
/**
* ARM VFP optimized float to int16 conversion.
* Assume that len is a positive number and is multiple of 8, destination
* buffer is at least 4 bytes aligned (8 bytes alignment is better for
* performance), little endian byte sex
*/
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
function ff_float_to_int16_vfp, export=1
push {r4-r8,lr}
vpush {d8-d11}
vldmia r1!, {s16-s23}
vcvt.s32.f32 s0, s16
vcvt.s32.f32 s1, s17
vcvt.s32.f32 s2, s18
vcvt.s32.f32 s3, s19
vcvt.s32.f32 s4, s20
vcvt.s32.f32 s5, s21
vcvt.s32.f32 s6, s22
vcvt.s32.f32 s7, s23
1:
subs r2, r2, #8
vmov r3, r4, s0, s1