Commit 64fddb0a authored by jehan's avatar jehan

disable neon optimization if no neon detected by android

parent 219ac72b
......@@ -167,9 +167,11 @@ ifeq ($(strip $(BOARD_USES_ALSA_AUDIO)),true)
LOCAL_SHARED_LIBRARIES += libasound
endif
LOCAL_STATIC_LIBRARIES += cpufeatures
include $(BUILD_STATIC_LIBRARY)
$(call import-module,android/cpufeatures)
......@@ -190,6 +190,23 @@ MS_FILTER_DESC_EXPORT(ms_resample_desc)
#ifdef __ARM_NEON__
#include <arm_neon.h>
static inline int MULT16_16(short int x, short int y) {
int res;
asm ("smulbb %0,%1,%2;\n"
: "=&r"(res)
: "%r"(x),"r"(y));
return(res);
}
static inline int MULT16_32_Q15(short int x, int y) {
int res;
asm ("smulwb %0,%1,%2;\n"
: "=&r"(res)
: "%r"(y<<1),"r"(x));
return(res);
}
#define SHR32(a,shift) ((a) >> (shift))
inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
int i;
float32x4_t sum = vdupq_n_f32 (0);
......@@ -203,8 +220,33 @@ inline float interpolate_product_single(const float *a, const float *b, unsigned
float32x2_t tmp = vadd_f32(vget_low_f32(sum), vget_high_f32(sum));
return vget_lane_f32 (vpadd_f32(tmp,tmp),0);
}
static int msresampler_as_neon=-1;
#ifdef ANDROID
#include "cpu-features.h"
#endif
inline int32_t interpolate_product_single_int(const int16_t *a, const int16_t *b, unsigned int len, const spx_uint32_t oversample, spx_int16_t *frac) {
#ifdef ANDROID
if (msresampler_as_neon == -1) {
msresampler_as_neon = (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM && (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0);
ms_message("msresampler %s neon",msresampler_as_neon!=0?"enabling":"disabling");
}
if (!msresampler_as_neon) {
/*no neon*/
/*from speex resampler.c*/
int accum[4] = {0,0,0,0};
int j;
for(j=0;j<len;j++) {
const short int curr_in=a[j];
accum[0] += MULT16_16(curr_in,*(b + j*oversample) );
accum[1] += MULT16_16(curr_in,*((b + 1) + j*oversample));
accum[2] += MULT16_16(curr_in,*((b + 2) + j*oversample));
accum[3] += MULT16_16(curr_in,*((b + 3) + j*oversample));
}
return MULT16_32_Q15(frac[0],SHR32(accum[0], 1)) + MULT16_32_Q15(frac[1],SHR32(accum[1], 1)) + MULT16_32_Q15(frac[2],SHR32(accum[2], 1)) + MULT16_32_Q15(frac[3],SHR32(accum[3], 1));
}
#endif
int i,j;
int32x4_t sum = vdupq_n_s32 (0);
int16x4_t f=vld1_s16 ((const int16_t*)frac);
......@@ -225,7 +267,22 @@ inline int32_t interpolate_product_single_int(const int16_t *a, const int16_t *b
return vget_lane_s32 (tmp,0);
}
#ifdef ANDROID
extern int ff_scalarproduct_int16_neon(const int16_t* sinc,const int16_t* iptr,int N);
inline int msresampler_scalarproduct_int16(const int16_t* sinc,const int16_t* iptr,int N) {
if (msresampler_as_neon == -1) {
msresampler_as_neon = (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM && (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0);
ms_message("msresampler %s neon",msresampler_as_neon!=0?"enabling":"disabling");
}
if (!msresampler_as_neon) {
register int sum;
register int j;
for(j=0;j<N;j++) sum += MULT16_16(sinc[j], iptr[j]);
return sum;
}
return ff_scalarproduct_int16_neon(sinc,iptr,N);
}
#endif
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment