Commit d3f5b947 authored by Janne Grunau's avatar Janne Grunau
Browse files

aarch64: opus NEON iMDCT and FFT

Opus celt decoding 11% faster and the iMDCT over 2.5 times faster on
Apple's A7.
parent 7c5ca546
......@@ -7,6 +7,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o
OBJS-$(CONFIG_OPUS_DECODER) += aarch64/opus_imdct_init.o
OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o
OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o
......@@ -23,4 +24,5 @@ NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
NEON-OBJS-$(CONFIG_OPUS_DECODER) += aarch64/opus_imdct_neon.o
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_neon.o
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AARCH64_ASM_OFFSETS_H
#define AVCODEC_AARCH64_ASM_OFFSETS_H
/* CeltIMDCTContext */
#define CELT_EXPTAB 0x20
#define CELT_FFT_N 0x00
#define CELT_LEN2 0x04
#define CELT_LEN4 (CELT_LEN2 + 0x4) // loaded as pair
#define CELT_TMP 0x10
#define CELT_TWIDDLE (CELT_TMP + 0x8) // loaded as pair
#endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavutil/internal.h"
#include "libavcodec/opus_imdct.h"
#include "asm-offsets.h"
AV_CHECK_OFFSET(CeltIMDCTContext, exptab, CELT_EXPTAB);
AV_CHECK_OFFSET(CeltIMDCTContext, fft_n, CELT_FFT_N);
AV_CHECK_OFFSET(CeltIMDCTContext, len2, CELT_LEN2);
AV_CHECK_OFFSET(CeltIMDCTContext, len4, CELT_LEN4);
AV_CHECK_OFFSET(CeltIMDCTContext, tmp, CELT_TMP);
AV_CHECK_OFFSET(CeltIMDCTContext, twiddle_exptab, CELT_TWIDDLE);
void ff_celt_imdct_half_neon(CeltIMDCTContext *s, float *dst, const float *src,
ptrdiff_t stride, float scale);
void ff_celt_imdct_init_aarch64(CeltIMDCTContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
s->imdct_half = ff_celt_imdct_half_neon;
}
}
This diff is collapsed.
......@@ -92,8 +92,6 @@ typedef struct OpusRangeCoder {
typedef struct SilkContext SilkContext;
typedef struct CeltIMDCTContext CeltIMDCTContext;
typedef struct CeltContext CeltContext;
typedef struct OpusPacket {
......@@ -398,22 +396,6 @@ int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc,
enum OpusBandwidth bandwidth, int coded_channels,
int duration_ms);
/**
* Init an iMDCT of the length 2 * 15 * (2^N)
*/
int ff_celt_imdct_init(CeltIMDCTContext **s, int N);
/**
* Free an iMDCT.
*/
void ff_celt_imdct_uninit(CeltIMDCTContext **s);
/**
* Calculate the middle half of the iMDCT
*/
void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
int src_stride, float scale);
int ff_celt_init(AVCodecContext *avctx, CeltContext **s, int output_channels);
void ff_celt_free(CeltContext **s);
......
......@@ -29,6 +29,7 @@
#include "libavutil/float_dsp.h"
#include "opus.h"
#include "opus_imdct.h"
enum CeltSpread {
CELT_SPREAD_NONE,
......@@ -2095,8 +2096,8 @@ int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
for (j = 0; j < s->blocks; j++) {
float *dst = frame->buf + 1024 + j * s->blocksize;
ff_celt_imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j,
s->blocks, imdct_scale);
imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j,
s->blocks, imdct_scale);
s->dsp.vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
celt_window, CELT_OVERLAP / 2);
}
......
......@@ -25,12 +25,19 @@
#include <float.h>
#include <math.h>
#include <stddef.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "fft.h"
#include "avfft.h"
#include "opus.h"
#include "opus_imdct.h"
// minimal iMDCT size to make SIMD opts easier
#define CELT_MIN_IMDCT_SIZE 120
// complex c = a * b
#define CMUL3(cre, cim, are, aim, bre, bim) \
......@@ -59,18 +66,6 @@ do { \
(d).im = -ri + ir; \
} while (0)
struct CeltIMDCTContext {
int fft_n;
int len2;
int len4;
FFTComplex *tmp;
FFTComplex *twiddle_exptab;
FFTComplex *exptab[6];
};
av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps)
{
CeltIMDCTContext *s = *ps;
......@@ -89,6 +84,9 @@ av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps)
av_freep(ps);
}
static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
ptrdiff_t stride, float scale);
av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N)
{
CeltIMDCTContext *s;
......@@ -96,7 +94,7 @@ av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N)
int len = 2 * len2;
int i, j;
if (len2 > CELT_MAX_FRAME_SIZE)
if (len2 > CELT_MAX_FRAME_SIZE || len2 < CELT_MIN_IMDCT_SIZE)
return AVERROR(EINVAL);
s = av_mallocz(sizeof(*s));
......@@ -136,6 +134,11 @@ av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N)
for (j = 15; j < 19; j++)
s->exptab[0][j] = s->exptab[0][j - 15];
s->imdct_half = celt_imdct_half;
if (ARCH_AARCH64)
ff_celt_imdct_init_aarch64(s);
*ps = s;
return 0;
......@@ -144,7 +147,7 @@ fail:
return AVERROR(ENOMEM);
}
static void fft5(FFTComplex *out, const FFTComplex *in, int stride)
static void fft5(FFTComplex *out, const FFTComplex *in, ptrdiff_t stride)
{
// [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
static const FFTComplex fact[] = { { 0.30901699437494745, 0.95105651629515353 },
......@@ -177,7 +180,7 @@ static void fft5(FFTComplex *out, const FFTComplex *in, int stride)
out[4].im = in[0].im + z[0][3].im + z[1][2].im + z[2][1].im + z[3][0].im;
}
static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int stride)
static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, ptrdiff_t stride)
{
const FFTComplex *exptab = s->exptab[0];
FFTComplex tmp[5];
......@@ -212,7 +215,8 @@ static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, in
/*
* FFT of the length 15 * (2^N)
*/
static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int N, int stride)
static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in,
int N, ptrdiff_t stride)
{
if (N) {
const FFTComplex *exptab = s->exptab[N];
......@@ -237,8 +241,8 @@ static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in,
fft15(s, out, in, stride);
}
void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
int stride, float scale)
static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
ptrdiff_t stride, float scale)
{
FFTComplex *z = (FFTComplex *)dst;
const int len8 = s->len4 / 2;
......
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_OPUS_IMDCT_H
#define AVCODEC_OPUS_IMDCT_H
#include <stddef.h>
#include "avfft.h"
typedef struct CeltIMDCTContext {
int fft_n;
int len2;
int len4;
FFTComplex *tmp;
FFTComplex *twiddle_exptab;
FFTComplex *exptab[6];
/**
* Calculate the middle half of the iMDCT
*/
void (*imdct_half)(struct CeltIMDCTContext *s, float *dst, const float *src,
ptrdiff_t src_stride, float scale);
} CeltIMDCTContext;
/**
* Init an iMDCT of the length 2 * 15 * (2^N)
*/
int ff_celt_imdct_init(CeltIMDCTContext **s, int N);
/**
* Free an iMDCT.
*/
void ff_celt_imdct_uninit(CeltIMDCTContext **s);
void ff_celt_imdct_init_aarch64(CeltIMDCTContext *s);
#endif /* AVCODEC_OPUS_IMDCT_H */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment