mpegaudiodec.c 80.1 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2
/*
 * MPEG Audio decoder
Fabrice Bellard's avatar
Fabrice Bellard committed
3
 * Copyright (c) 2001, 2002 Fabrice Bellard.
Fabrice Bellard's avatar
Fabrice Bellard committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Fabrice Bellard's avatar
Fabrice Bellard committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Fabrice Bellard's avatar
Fabrice Bellard committed
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
20
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
21 22 23 24

/**
 * @file mpegaudiodec.c
 * MPEG Audio decoder.
25
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
26

27
//#define DEBUG
Fabrice Bellard's avatar
Fabrice Bellard committed
28
#include "avcodec.h"
29
#include "bitstream.h"
30
#include "dsputil.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
31 32

/*
33 34 35
 * TODO:
 *  - in low precision mode, use more 16 bit multiplies in synth filter
 *  - test lsf / mpeg25 extensively.
Fabrice Bellard's avatar
Fabrice Bellard committed
36 37
 */

38 39
/* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
   audio decoder */
40
#ifdef CONFIG_MPEGAUDIO_HP
41
#   define USE_HIGHPRECISION
42
#endif
43

Roberto Togni's avatar
Roberto Togni committed
44
#include "mpegaudio.h"
45
#include "mpegaudiodecheader.h"
46

Luca Barbato's avatar
Luca Barbato committed
47 48
#include "mathops.h"

49 50 51 52
/* WARNING: only correct for posititive numbers */
#define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)

Michael Niedermayer's avatar
Michael Niedermayer committed
53 54
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))

55 56
/****************/

Fabrice Bellard's avatar
Fabrice Bellard committed
57 58
#define HEADER_SIZE 4

59 60 61 62 63 64 65 66 67
/**
 * Context for MP3On4 decoder
 */
typedef struct MP3On4DecodeContext {
    int frames;   ///< number of mp3 frames per block (number of mp3 decoder instances)
    int chan_cfg; ///< channel config number
    MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
} MP3On4DecodeContext;

68 69
/* layer 3 "granule" */
typedef struct GranuleDef {
70
    uint8_t scfsi;
71 72 73 74
    int part2_3_length;
    int big_values;
    int global_gain;
    int scalefac_compress;
75 76
    uint8_t block_type;
    uint8_t switch_point;
77 78
    int table_select[3];
    int subblock_gain[3];
79 80
    uint8_t scalefac_scale;
    uint8_t count1table_select;
81 82 83
    int region_size[3]; /* number of huffman codes in each region */
    int preflag;
    int short_start, long_end; /* long/short band indexes */
84 85
    uint8_t scale_factors[40];
    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
86
} GranuleDef;
Fabrice Bellard's avatar
Fabrice Bellard committed
87

88
#include "mpegaudiodata.h"
89 90
#include "mpegaudiodectab.h"

91 92 93
static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);

94
/* vlc structure for decoding layer 3 huffman tables */
95
static VLC huff_vlc[16];
96 97
static VLC huff_quad_vlc[2];
/* computed from band_size_long */
98
static uint16_t band_index_long[9][23];
99
/* XXX: free when all decoders are closed */
100
#define TABLE_4_3_SIZE (8191 + 16)*4
101 102
static int8_t  table_4_3_exp[TABLE_4_3_SIZE];
static uint32_t table_4_3_value[TABLE_4_3_SIZE];
103
static uint32_t exp_table[512];
104
static uint32_t expval_table[512][16];
105
/* intensity stereo coef table */
106 107
static int32_t is_table[2][16];
static int32_t is_table_lsf[2][2][16];
108 109
static int32_t csa_table[8][4];
static float csa_table_float[8][4];
110
static int32_t mdct_win[8][36];
111 112

/* lower 2 bits: modulo 3, higher bits: shift */
113
static uint16_t scale_factor_modshift[64];
114
/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
115
static int32_t scale_factor_mult[15][3];
116 117 118 119 120
/* mult table for layer 2 group quantization */

#define SCALE_GEN(v) \
{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }

Michael Niedermayer's avatar
Michael Niedermayer committed
121
static const int32_t scale_factor_mult2[3][3] = {
122 123 124
    SCALE_GEN(4.0 / 3.0), /* 3 steps */
    SCALE_GEN(4.0 / 5.0), /* 5 steps */
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
125 126
};

127
static DECLARE_ALIGNED_16(MPA_INT, window[512]);
128

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
/**
 * Convert region offsets to region sizes and truncate
 * size to big_values.
 */
void ff_region_offset2size(GranuleDef *g){
    int i, k, j=0;
    g->region_size[2] = (576 / 2);
    for(i=0;i<3;i++) {
        k = FFMIN(g->region_size[i], g->big_values);
        g->region_size[i] = k - j;
        j = k;
    }
}

void ff_init_short_region(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2)
        g->region_size[0] = (36 / 2);
    else {
        if (s->sample_rate_index <= 2)
            g->region_size[0] = (36 / 2);
        else if (s->sample_rate_index != 8)
            g->region_size[0] = (54 / 2);
        else
            g->region_size[0] = (108 / 2);
    }
    g->region_size[1] = (576 / 2);
}

void ff_init_long_region(MPADecodeContext *s, GranuleDef *g, int ra1, int ra2){
    int l;
    g->region_size[0] =
        band_index_long[s->sample_rate_index][ra1 + 1] >> 1;
    /* should not overflow */
    l = FFMIN(ra1 + ra2 + 2, 22);
    g->region_size[1] =
        band_index_long[s->sample_rate_index][l] >> 1;
}

void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2) {
        if (g->switch_point) {
            /* if switched mode, we handle the 36 first samples as
                long blocks.  For 8000Hz, we handle the 48 first
                exponents as long blocks (XXX: check this!) */
            if (s->sample_rate_index <= 2)
                g->long_end = 8;
            else if (s->sample_rate_index != 8)
                g->long_end = 6;
            else
                g->long_end = 4; /* 8000 Hz */

            g->short_start = 2 + (s->sample_rate_index != 8);
        } else {
            g->long_end = 0;
            g->short_start = 0;
        }
    } else {
        g->short_start = 13;
        g->long_end = 22;
    }
}

191 192 193 194 195
/* layer 1 unscaling */
/* n = number of bits of the mantissa minus 1 */
static inline int l1_unscale(int n, int mant, int scale_factor)
{
    int shift, mod;
196
    int64_t val;
197 198 199 200 201 202

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
    val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
    shift += n;
203 204
    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
    return (int)((val + (1LL << (shift - 1))) >> shift);
205 206 207 208 209 210 211 212 213
}

static inline int l2_unscale_group(int steps, int mant, int scale_factor)
{
    int shift, mod, val;

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
214 215 216 217 218 219

    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
    /* NOTE: at this point, 0 <= shift <= 21 */
    if (shift > 0)
        val = (val + (1 << (shift - 1))) >> shift;
    return val;
220 221 222 223 224 225 226 227
}

/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
static inline int l3_unscale(int value, int exponent)
{
    unsigned int m;
    int e;

228 229 230 231
    e = table_4_3_exp  [4*value + (exponent&3)];
    m = table_4_3_value[4*value + (exponent&3)];
    e -= (exponent >> 2);
    assert(e>=1);
232
    if (e > 31)
233
        return 0;
234
    m = (m + (1 << (e-1))) >> e;
235

236 237 238
    return m;
}

239 240 241 242 243 244
/* all integer n^(4/3) computation code */
#define DEV_ORDER 13

#define POW_FRAC_BITS 24
#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
245
#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
246 247 248

static int dev_4_3_coefs[DEV_ORDER];

249
#if 0 /* unused */
250 251 252 253 254
static int pow_mult3[3] = {
    POW_FIX(1.0),
    POW_FIX(1.25992104989487316476),
    POW_FIX(1.58740105196819947474),
};
255
#endif
256 257 258 259 260 261 262 263 264 265 266 267

static void int_pow_init(void)
{
    int i, a;

    a = POW_FIX(1.0);
    for(i=0;i<DEV_ORDER;i++) {
        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
        dev_4_3_coefs[i] = a;
    }
}

268
#if 0 /* unused, remove? */
269 270 271 272 273
/* return the mantissa and the binary exponent */
static int int_pow(int i, int *exp_ptr)
{
    int e, er, eq, j;
    int a, a1;
274

275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
    /* renormalize */
    a = i;
    e = POW_FRAC_BITS;
    while (a < (1 << (POW_FRAC_BITS - 1))) {
        a = a << 1;
        e--;
    }
    a -= (1 << POW_FRAC_BITS);
    a1 = 0;
    for(j = DEV_ORDER - 1; j >= 0; j--)
        a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
    a = (1 << POW_FRAC_BITS) + a1;
    /* exponent compute (exact) */
    e = e * 4;
    er = e % 3;
    eq = e / 3;
    a = POW_MULL(a, pow_mult3[er]);
    while (a >= 2 * POW_FRAC_ONE) {
        a = a >> 1;
        eq++;
    }
    /* convert to float */
    while (a < POW_FRAC_ONE) {
        a = a << 1;
        eq--;
    }
301
    /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
302
#if POW_FRAC_BITS > FRAC_BITS
303 304 305 306 307 308 309
    a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
    /* correct overflow */
    if (a >= 2 * (1 << FRAC_BITS)) {
        a = a >> 1;
        eq++;
    }
#endif
310 311 312
    *exp_ptr = eq;
    return a;
}
313
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
314 315 316 317

static int decode_init(AVCodecContext * avctx)
{
    MPADecodeContext *s = avctx->priv_data;
318
    static int init=0;
319
    int i, j, k;
Fabrice Bellard's avatar
Fabrice Bellard committed
320

Michel Bardiaux's avatar
Michel Bardiaux committed
321 322
    s->avctx = avctx;

323
#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
324 325 326
    avctx->sample_fmt= SAMPLE_FMT_S32;
#else
    avctx->sample_fmt= SAMPLE_FMT_S16;
327
#endif
328
    s->error_resilience= avctx->error_resilience;
329

Michael Niedermayer's avatar
Michael Niedermayer committed
330
    if(avctx->antialias_algo != FF_AA_FLOAT)
Michael Niedermayer's avatar
10l  
Michael Niedermayer committed
331 332 333 334
        s->compute_antialias= compute_antialias_integer;
    else
        s->compute_antialias= compute_antialias_float;

335
    if (!init && !avctx->parse_only) {
336 337 338 339
        /* scale factors table for layer 1/2 */
        for(i=0;i<64;i++) {
            int shift, mod;
            /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
340
            shift = (i / 3);
341 342 343 344 345 346 347 348
            mod = i % 3;
            scale_factor_modshift[i] = mod | (shift << 2);
        }

        /* scale factor multiply for layer 1 */
        for(i=0;i<15;i++) {
            int n, norm;
            n = i + 2;
349
            norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
350 351 352
            scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
            scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
            scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
Michel Bardiaux's avatar
Michel Bardiaux committed
353
            dprintf(avctx, "%d: norm=%x s=%x %x %x\n",
354
                    i, norm,
355 356 357 358
                    scale_factor_mult[i][0],
                    scale_factor_mult[i][1],
                    scale_factor_mult[i][2]);
        }
359

360
        ff_mpa_synth_init(window);
361

362 363 364
        /* huffman decode tables */
        for(i=1;i<16;i++) {
            const HuffTable *h = &mpa_huff_tables[i];
365
            int xsize, x, y;
366
            unsigned int n;
Michael Niedermayer's avatar
Michael Niedermayer committed
367 368
            uint8_t  tmp_bits [512];
            uint16_t tmp_codes[512];
Michael Niedermayer's avatar
Michael Niedermayer committed
369 370 371

            memset(tmp_bits , 0, sizeof(tmp_bits ));
            memset(tmp_codes, 0, sizeof(tmp_codes));
372 373 374

            xsize = h->xsize;
            n = xsize * xsize;
375

376 377
            j = 0;
            for(x=0;x<xsize;x++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
378
                for(y=0;y<xsize;y++){
Michael Niedermayer's avatar
Michael Niedermayer committed
379 380
                    tmp_bits [(x << 5) | y | ((x&&y)<<4)]= h->bits [j  ];
                    tmp_codes[(x << 5) | y | ((x&&y)<<4)]= h->codes[j++];
Michael Niedermayer's avatar
Michael Niedermayer committed
381
                }
382
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
383 384

            /* XXX: fail test */
Michael Niedermayer's avatar
Michael Niedermayer committed
385
            init_vlc(&huff_vlc[i], 7, 512,
Michael Niedermayer's avatar
Michael Niedermayer committed
386
                     tmp_bits, 1, 1, tmp_codes, 2, 2, 1);
387 388
        }
        for(i=0;i<2;i++) {
389
            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
390
                     mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1);
391 392 393 394 395 396 397 398 399 400 401
        }

        for(i=0;i<9;i++) {
            k = 0;
            for(j=0;j<22;j++) {
                band_index_long[i][j] = k;
                k += band_size_long[i][j];
            }
            band_index_long[i][22] = k;
        }

402
        /* compute n ^ (4/3) and store it in mantissa/exp format */
403

404
        int_pow_init();
405
        for(i=1;i<TABLE_4_3_SIZE;i++) {
406
            double f, fm;
407
            int e, m;
408 409
            f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
            fm = frexp(f, &e);
410
            m = (uint32_t)(fm*(1LL<<31) + 0.5);
411
            e+= FRAC_BITS - 31 + 5 - 100;
412

413 414
            /* normalized to FRAC_BITS */
            table_4_3_value[i] = m;
415 416
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0));
            table_4_3_exp[i] = -e;
417
        }
418
        for(i=0; i<512*16; i++){
419 420
            int exponent= (i>>4);
            double f= pow(i&15, 4.0 / 3.0) * pow(2, (exponent-400)*0.25 + FRAC_BITS + 5);
421
            expval_table[exponent][i&15]= llrint(f);
422
            if((i&15)==1)
423
                exp_table[exponent]= llrint(f);
424
        }
425

426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
        for(i=0;i<7;i++) {
            float f;
            int v;
            if (i != 6) {
                f = tan((double)i * M_PI / 12.0);
                v = FIXR(f / (1.0 + f));
            } else {
                v = FIXR(1.0);
            }
            is_table[0][i] = v;
            is_table[1][6 - i] = v;
        }
        /* invalid values */
        for(i=7;i<16;i++)
            is_table[0][i] = is_table[1][i] = 0.0;

        for(i=0;i<16;i++) {
            double f;
            int e, k;

            for(j=0;j<2;j++) {
                e = -(j + 1) * ((i + 1) >> 1);
                f = pow(2.0, e / 4.0);
                k = i & 1;
                is_table_lsf[j][k ^ 1][i] = FIXR(f);
                is_table_lsf[j][k][i] = FIXR(1.0);
Michel Bardiaux's avatar
Michel Bardiaux committed
452
                dprintf(avctx, "is_table_lsf %d %d: %x %x\n",
453 454 455 456 457 458 459 460 461
                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
            }
        }

        for(i=0;i<8;i++) {
            float ci, cs, ca;
            ci = ci_table[i];
            cs = 1.0 / sqrt(1.0 + ci * ci);
            ca = cs * ci;
Michael Niedermayer's avatar
Michael Niedermayer committed
462 463 464
            csa_table[i][0] = FIXHR(cs/4);
            csa_table[i][1] = FIXHR(ca/4);
            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
465
            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
466 467 468
            csa_table_float[i][0] = cs;
            csa_table_float[i][1] = ca;
            csa_table_float[i][2] = ca + cs;
469
            csa_table_float[i][3] = ca - cs;
470
//            printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
Michael Niedermayer's avatar
Michael Niedermayer committed
471
//            av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs);
472 473 474 475
        }

        /* compute mdct windows */
        for(i=0;i<36;i++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
476 477
            for(j=0; j<4; j++){
                double d;
478

Michael Niedermayer's avatar
Michael Niedermayer committed
479 480
                if(j==2 && i%3 != 1)
                    continue;
481

Michael Niedermayer's avatar
Michael Niedermayer committed
482 483 484 485 486 487 488 489 490 491 492
                d= sin(M_PI * (i + 0.5) / 36.0);
                if(j==1){
                    if     (i>=30) d= 0;
                    else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
                    else if(i>=18) d= 1;
                }else if(j==3){
                    if     (i<  6) d= 0;
                    else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
                    else if(i< 18) d= 1;
                }
                //merge last stage of imdct into the window coefficients
Michael Niedermayer's avatar
Michael Niedermayer committed
493 494 495 496 497 498
                d*= 0.5 / cos(M_PI*(2*i + 19)/72);

                if(j==2)
                    mdct_win[j][i/3] = FIXHR((d / (1<<5)));
                else
                    mdct_win[j][i  ] = FIXHR((d / (1<<5)));
Michael Niedermayer's avatar
Michael Niedermayer committed
499 500
//                av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5));
            }
501 502 503 504 505 506 507 508 509 510 511 512 513
        }

        /* NOTE: we do frequency inversion adter the MDCT by changing
           the sign of the right window coefs */
        for(j=0;j<4;j++) {
            for(i=0;i<36;i+=2) {
                mdct_win[j + 4][i] = mdct_win[j][i];
                mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
            }
        }

#if defined(DEBUG)
        for(j=0;j<8;j++) {
514
            av_log(avctx, AV_LOG_DEBUG, "win%d=\n", j);
515
            for(i=0;i<36;i++)
516 517
                av_log(avctx, AV_LOG_DEBUG, "%f, ", (double)mdct_win[j][i] / FRAC_ONE);
            av_log(avctx, AV_LOG_DEBUG, "\n");
518 519
        }
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
520 521 522
        init = 1;
    }

523 524 525
#ifdef DEBUG
    s->frame_count = 0;
#endif
Roberto Togni's avatar
Roberto Togni committed
526 527
    if (avctx->codec_id == CODEC_ID_MP3ADU)
        s->adu_mode = 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
528 529 530
    return 0;
}

531
/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
532 533 534

/* cos(i*pi/64) */

535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
#define COS0_0  FIXHR(0.50060299823519630134/2)
#define COS0_1  FIXHR(0.50547095989754365998/2)
#define COS0_2  FIXHR(0.51544730992262454697/2)
#define COS0_3  FIXHR(0.53104259108978417447/2)
#define COS0_4  FIXHR(0.55310389603444452782/2)
#define COS0_5  FIXHR(0.58293496820613387367/2)
#define COS0_6  FIXHR(0.62250412303566481615/2)
#define COS0_7  FIXHR(0.67480834145500574602/2)
#define COS0_8  FIXHR(0.74453627100229844977/2)
#define COS0_9  FIXHR(0.83934964541552703873/2)
#define COS0_10 FIXHR(0.97256823786196069369/2)
#define COS0_11 FIXHR(1.16943993343288495515/4)
#define COS0_12 FIXHR(1.48416461631416627724/4)
#define COS0_13 FIXHR(2.05778100995341155085/8)
#define COS0_14 FIXHR(3.40760841846871878570/8)
#define COS0_15 FIXHR(10.19000812354805681150/32)

#define COS1_0 FIXHR(0.50241928618815570551/2)
#define COS1_1 FIXHR(0.52249861493968888062/2)
#define COS1_2 FIXHR(0.56694403481635770368/2)
#define COS1_3 FIXHR(0.64682178335999012954/2)
#define COS1_4 FIXHR(0.78815462345125022473/2)
#define COS1_5 FIXHR(1.06067768599034747134/4)
#define COS1_6 FIXHR(1.72244709823833392782/4)
#define COS1_7 FIXHR(5.10114861868916385802/16)

#define COS2_0 FIXHR(0.50979557910415916894/2)
#define COS2_1 FIXHR(0.60134488693504528054/2)
#define COS2_2 FIXHR(0.89997622313641570463/2)
#define COS2_3 FIXHR(2.56291544774150617881/8)

#define COS3_0 FIXHR(0.54119610014619698439/2)
#define COS3_1 FIXHR(1.30656296487637652785/4)

#define COS4_0 FIXHR(0.70710678118654752439/2)
570 571

/* butterfly operator */
572
#define BF(a, b, c, s)\
573 574 575 576
{\
    tmp0 = tab[a] + tab[b];\
    tmp1 = tab[a] - tab[b];\
    tab[a] = tmp0;\
577
    tab[b] = MULH(tmp1<<(s), c);\
578 579 580 581
}

#define BF1(a, b, c, d)\
{\
582 583
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
584 585 586 587 588
    tab[c] += tab[d];\
}

#define BF2(a, b, c, d)\
{\
589 590
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
591 592 593 594 595 596 597 598 599
    tab[c] += tab[d];\
    tab[a] += tab[c];\
    tab[c] += tab[b];\
    tab[b] += tab[d];\
}

#define ADD(a, b) tab[a] += tab[b]

/* DCT32 without 1/sqrt(2) coef zero scaling. */
600
static void dct32(int32_t *out, int32_t *tab)
601 602 603 604
{
    int tmp0, tmp1;

    /* pass 1 */
605 606
    BF( 0, 31, COS0_0 , 1);
    BF(15, 16, COS0_15, 5);
607
    /* pass 2 */
608 609
    BF( 0, 15, COS1_0 , 1);
    BF(16, 31,-COS1_0 , 1);
610
    /* pass 1 */
611 612
    BF( 7, 24, COS0_7 , 1);
    BF( 8, 23, COS0_8 , 1);
613
    /* pass 2 */
614 615
    BF( 7,  8, COS1_7 , 4);
    BF(23, 24,-COS1_7 , 4);
616
    /* pass 3 */
617 618 619 620
    BF( 0,  7, COS2_0 , 1);
    BF( 8, 15,-COS2_0 , 1);
    BF(16, 23, COS2_0 , 1);
    BF(24, 31,-COS2_0 , 1);
621
    /* pass 1 */
622 623
    BF( 3, 28, COS0_3 , 1);
    BF(12, 19, COS0_12, 2);
624
    /* pass 2 */
625 626
    BF( 3, 12, COS1_3 , 1);
    BF(19, 28,-COS1_3 , 1);
627
    /* pass 1 */
628 629
    BF( 4, 27, COS0_4 , 1);
    BF(11, 20, COS0_11, 2);
630
    /* pass 2 */
631 632
    BF( 4, 11, COS1_4 , 1);
    BF(20, 27,-COS1_4 , 1);
633
    /* pass 3 */
634 635 636 637
    BF( 3,  4, COS2_3 , 3);
    BF(11, 12,-COS2_3 , 3);
    BF(19, 20, COS2_3 , 3);
    BF(27, 28,-COS2_3 , 3);
638
    /* pass 4 */
639 640 641 642 643 644 645 646
    BF( 0,  3, COS3_0 , 1);
    BF( 4,  7,-COS3_0 , 1);
    BF( 8, 11, COS3_0 , 1);
    BF(12, 15,-COS3_0 , 1);
    BF(16, 19, COS3_0 , 1);
    BF(20, 23,-COS3_0 , 1);
    BF(24, 27, COS3_0 , 1);
    BF(28, 31,-COS3_0 , 1);
647

648 649 650


    /* pass 1 */
651 652
    BF( 1, 30, COS0_1 , 1);
    BF(14, 17, COS0_14, 3);
653
    /* pass 2 */
654 655
    BF( 1, 14, COS1_1 , 1);
    BF(17, 30,-COS1_1 , 1);
656
    /* pass 1 */
657 658
    BF( 6, 25, COS0_6 , 1);
    BF( 9, 22, COS0_9 , 1);
659
    /* pass 2 */
660 661
    BF( 6,  9, COS1_6 , 2);
    BF(22, 25,-COS1_6 , 2);
662
    /* pass 3 */
663 664 665 666
    BF( 1,  6, COS2_1 , 1);
    BF( 9, 14,-COS2_1 , 1);
    BF(17, 22, COS2_1 , 1);
    BF(25, 30,-COS2_1 , 1);
667

668
    /* pass 1 */
669 670
    BF( 2, 29, COS0_2 , 1);
    BF(13, 18, COS0_13, 3);
671
    /* pass 2 */
672 673
    BF( 2, 13, COS1_2 , 1);
    BF(18, 29,-COS1_2 , 1);
674
    /* pass 1 */
675 676
    BF( 5, 26, COS0_5 , 1);
    BF(10, 21, COS0_10, 1);
677
    /* pass 2 */
678 679
    BF( 5, 10, COS1_5 , 2);
    BF(21, 26,-COS1_5 , 2);
680
    /* pass 3 */
681 682 683 684
    BF( 2,  5, COS2_2 , 1);
    BF(10, 13,-COS2_2 , 1);
    BF(18, 21, COS2_2 , 1);
    BF(26, 29,-COS2_2 , 1);
685
    /* pass 4 */
686 687 688 689 690 691 692 693
    BF( 1,  2, COS3_1 , 2);
    BF( 5,  6,-COS3_1 , 2);
    BF( 9, 10, COS3_1 , 2);
    BF(13, 14,-COS3_1 , 2);
    BF(17, 18, COS3_1 , 2);
    BF(21, 22,-COS3_1 , 2);
    BF(25, 26, COS3_1 , 2);
    BF(29, 30,-COS3_1 , 2);
694

695
    /* pass 5 */
696 697 698
    BF1( 0,  1,  2,  3);
    BF2( 4,  5,  6,  7);
    BF1( 8,  9, 10, 11);
699 700 701 702 703
    BF2(12, 13, 14, 15);
    BF1(16, 17, 18, 19);
    BF2(20, 21, 22, 23);
    BF1(24, 25, 26, 27);
    BF2(28, 29, 30, 31);
704

705
    /* pass 6 */
706

707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
    ADD( 8, 12);
    ADD(12, 10);
    ADD(10, 14);
    ADD(14,  9);
    ADD( 9, 13);
    ADD(13, 11);
    ADD(11, 15);

    out[ 0] = tab[0];
    out[16] = tab[1];
    out[ 8] = tab[2];
    out[24] = tab[3];
    out[ 4] = tab[4];
    out[20] = tab[5];
    out[12] = tab[6];
    out[28] = tab[7];
    out[ 2] = tab[8];
    out[18] = tab[9];
    out[10] = tab[10];
    out[26] = tab[11];
    out[ 6] = tab[12];
    out[22] = tab[13];
    out[14] = tab[14];
    out[30] = tab[15];
731

732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
    ADD(24, 28);
    ADD(28, 26);
    ADD(26, 30);
    ADD(30, 25);
    ADD(25, 29);
    ADD(29, 27);
    ADD(27, 31);

    out[ 1] = tab[16] + tab[24];
    out[17] = tab[17] + tab[25];
    out[ 9] = tab[18] + tab[26];
    out[25] = tab[19] + tab[27];
    out[ 5] = tab[20] + tab[28];
    out[21] = tab[21] + tab[29];
    out[13] = tab[22] + tab[30];
    out[29] = tab[23] + tab[31];
    out[ 3] = tab[24] + tab[20];
    out[19] = tab[25] + tab[21];
    out[11] = tab[26] + tab[22];
    out[27] = tab[27] + tab[23];
    out[ 7] = tab[28] + tab[18];
    out[23] = tab[29] + tab[19];
    out[15] = tab[30] + tab[17];
    out[31] = tab[31];
}

#if FRAC_BITS <= 15

760
static inline int round_sample(int *sum)
761 762
{
    int sum1;
763 764
    sum1 = (*sum) >> OUT_SHIFT;
    *sum &= (1<<OUT_SHIFT)-1;
765 766 767 768
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
769
    return sum1;
770 771
}

Luca Barbato's avatar
Luca Barbato committed
772 773
/* signed 16x16 -> 32 multiply add accumulate */
#define MACS(rt, ra, rb) MAC16(rt, ra, rb)
Siarhei Siamashka's avatar
Siarhei Siamashka committed
774

Luca Barbato's avatar
Luca Barbato committed
775 776
/* signed 16x16 -> 32 multiply */
#define MULS(ra, rb) MUL16(ra, rb)
777

778 779
#else

780
static inline int round_sample(int64_t *sum)
781 782
{
    int sum1;
783 784
    sum1 = (int)((*sum) >> OUT_SHIFT);
    *sum &= (1<<OUT_SHIFT)-1;
785 786 787 788
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
789
    return sum1;
790 791
}

792
#   define MULS(ra, rb) MUL64(ra, rb)
793 794 795
#endif

#define SUM8(sum, op, w, p) \
796
{                                               \
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
    sum op MULS((w)[0 * 64], p[0 * 64]);\
    sum op MULS((w)[1 * 64], p[1 * 64]);\
    sum op MULS((w)[2 * 64], p[2 * 64]);\
    sum op MULS((w)[3 * 64], p[3 * 64]);\
    sum op MULS((w)[4 * 64], p[4 * 64]);\
    sum op MULS((w)[5 * 64], p[5 * 64]);\
    sum op MULS((w)[6 * 64], p[6 * 64]);\
    sum op MULS((w)[7 * 64], p[7 * 64]);\
}

#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
{                                               \
    int tmp;\
    tmp = p[0 * 64];\
    sum1 op1 MULS((w1)[0 * 64], tmp);\
    sum2 op2 MULS((w2)[0 * 64], tmp);\
    tmp = p[1 * 64];\
    sum1 op1 MULS((w1)[1 * 64], tmp);\
    sum2 op2 MULS((w2)[1 * 64], tmp);\
    tmp = p[2 * 64];\
    sum1 op1 MULS((w1)[2 * 64], tmp);\
    sum2 op2 MULS((w2)[2 * 64], tmp);\
    tmp = p[3 * 64];\
    sum1 op1 MULS((w1)[3 * 64], tmp);\
    sum2 op2 MULS((w2)[3 * 64], tmp);\
    tmp = p[4 * 64];\
    sum1 op1 MULS((w1)[4 * 64], tmp);\
    sum2 op2 MULS((w2)[4 * 64], tmp);\
    tmp = p[5 * 64];\
    sum1 op1 MULS((w1)[5 * 64], tmp);\
    sum2 op2 MULS((w2)[5 * 64], tmp);\
    tmp = p[6 * 64];\
    sum1 op1 MULS((w1)[6 * 64], tmp);\
    sum2 op2 MULS((w2)[6 * 64], tmp);\
    tmp = p[7 * 64];\
    sum1 op1 MULS((w1)[7 * 64], tmp);\
    sum2 op2 MULS((w2)[7 * 64], tmp);\
834 835
}

836 837 838 839 840 841 842
void ff_mpa_synth_init(MPA_INT *window)
{
    int i;

    /* max = 18760, max sum over all 16 coefs : 44736 */
    for(i=0;i<257;i++) {
        int v;
843
        v = ff_mpa_enwindow[i];
844 845 846 847 848 849 850 851
#if WFRAC_BITS < 16
        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
#endif
        window[i] = v;
        if ((i & 63) != 0)
            v = -v;
        if (i != 0)
            window[512 - i] = v;
852
    }
853
}
854 855 856 857

/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
   32 samples. */
/* XXX: optimize by avoiding ring buffer usage */
858
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
859
                         MPA_INT *window, int *dither_state,
860
                         OUT_INT *samples, int incr,
861
                         int32_t sb_samples[SBLIMIT])
862
{
863
    int32_t tmp[32];
864
    register MPA_INT *synth_buf;
Alex Beregszaszi's avatar
Alex Beregszaszi committed
865
    register const MPA_INT *w, *w2, *p;
866
    int j, offset, v;
867
    OUT_INT *samples2;
868
#if FRAC_BITS <= 15
869
    int sum, sum2;
870
#else
871
    int64_t sum, sum2;
872
#endif
873

874
    dct32(tmp, sb_samples);
875

876 877
    offset = *synth_buf_offset;
    synth_buf = synth_buf_ptr + offset;
878 879 880 881

    for(j=0;j<32;j++) {
        v = tmp[j];
#if FRAC_BITS <= 15
882 883
        /* NOTE: can cause a loss in precision if very high amplitude
           sound */
884
        v = av_clip_int16(v);
885 886 887 888 889 890
#endif
        synth_buf[j] = v;
    }
    /* copy to avoid wrap */
    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));

891
    samples2 = samples + 31 * incr;
892
    w = window;
893 894
    w2 = window + 31;

895
    sum = *dither_state;
896 897 898 899
    p = synth_buf + 16;
    SUM8(sum, +=, w, p);
    p = synth_buf + 48;
    SUM8(sum, -=, w + 32, p);
900
    *samples = round_sample(&sum);
901
    samples += incr;
902 903
    w++;

904 905 906 907 908 909 910 911 912
    /* we calculate two samples at the same time to avoid one memory
       access per two sample */
    for(j=1;j<16;j++) {
        sum2 = 0;
        p = synth_buf + 16 + j;
        SUM8P2(sum, +=, sum2, -=, w, w2, p);
        p = synth_buf + 48 - j;
        SUM8P2(sum, -=, sum2, -=, w + 32, w2 + 32, p);

913
        *samples = round_sample(&sum);
914
        samples += incr;
915 916
        sum += sum2;
        *samples2 = round_sample(&sum);
917
        samples2 -= incr;
918
        w++;
919
        w2--;
920
    }
921

922 923
    p = synth_buf + 32;
    SUM8(sum, -=, w + 32, p);
924
    *samples = round_sample(&sum);
925
    *dither_state= sum;
926

927
    offset = (offset - 32) & 511;
928
    *synth_buf_offset = offset;