mpegaudiodec.c 78.1 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2
/*
 * MPEG Audio decoder
Fabrice Bellard's avatar
Fabrice Bellard committed
3
 * Copyright (c) 2001, 2002 Fabrice Bellard.
Fabrice Bellard's avatar
Fabrice Bellard committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Fabrice Bellard's avatar
Fabrice Bellard committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Fabrice Bellard's avatar
Fabrice Bellard committed
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
20
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
21 22 23 24

/**
 * @file mpegaudiodec.c
 * MPEG Audio decoder.
25
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
26

Fabrice Bellard's avatar
Fabrice Bellard committed
27
#include "avcodec.h"
28
#include "bitstream.h"
29
#include "dsputil.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
30 31

/*
32 33 34
 * TODO:
 *  - in low precision mode, use more 16 bit multiplies in synth filter
 *  - test lsf / mpeg25 extensively.
Fabrice Bellard's avatar
Fabrice Bellard committed
35 36
 */

37 38
/* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
   audio decoder */
39
#ifdef CONFIG_MPEGAUDIO_HP
40
#   define USE_HIGHPRECISION
41
#endif
42

Roberto Togni's avatar
Roberto Togni committed
43
#include "mpegaudio.h"
44
#include "mpegaudiodecheader.h"
45

Luca Barbato's avatar
Luca Barbato committed
46 47
#include "mathops.h"

48 49 50 51
/* WARNING: only correct for posititive numbers */
#define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)

Michael Niedermayer's avatar
Michael Niedermayer committed
52 53
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))

54 55
/****************/

Fabrice Bellard's avatar
Fabrice Bellard committed
56 57
#define HEADER_SIZE 4

58 59
/* layer 3 "granule" */
typedef struct GranuleDef {
60
    uint8_t scfsi;
61 62 63 64
    int part2_3_length;
    int big_values;
    int global_gain;
    int scalefac_compress;
65 66
    uint8_t block_type;
    uint8_t switch_point;
67 68
    int table_select[3];
    int subblock_gain[3];
69 70
    uint8_t scalefac_scale;
    uint8_t count1table_select;
71 72 73
    int region_size[3]; /* number of huffman codes in each region */
    int preflag;
    int short_start, long_end; /* long/short band indexes */
74 75
    uint8_t scale_factors[40];
    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
76
} GranuleDef;
Fabrice Bellard's avatar
Fabrice Bellard committed
77

78
#include "mpegaudiodata.h"
79 80
#include "mpegaudiodectab.h"

81 82 83
static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);

84
/* vlc structure for decoding layer 3 huffman tables */
85
static VLC huff_vlc[16];
86 87 88 89 90 91 92 93
static VLC_TYPE huff_vlc_tables[
  0+128+128+128+130+128+154+166+
  142+204+190+170+542+460+662+414
  ][2];
static const int huff_vlc_tables_sizes[16] = {
  0, 128, 128, 128, 130, 128, 154, 166,
  142, 204, 190, 170, 542, 460, 662, 414
};
94
static VLC huff_quad_vlc[2];
95 96 97 98
static VLC_TYPE huff_quad_vlc_tables[128+16][2];
static const int huff_quad_vlc_tables_sizes[2] = {
  128, 16
};
99
/* computed from band_size_long */
100
static uint16_t band_index_long[9][23];
101
/* XXX: free when all decoders are closed */
102
#define TABLE_4_3_SIZE (8191 + 16)*4
103 104
static int8_t  table_4_3_exp[TABLE_4_3_SIZE];
static uint32_t table_4_3_value[TABLE_4_3_SIZE];
105
static uint32_t exp_table[512];
106
static uint32_t expval_table[512][16];
107
/* intensity stereo coef table */
108 109
static int32_t is_table[2][16];
static int32_t is_table_lsf[2][2][16];
110 111
static int32_t csa_table[8][4];
static float csa_table_float[8][4];
112
static int32_t mdct_win[8][36];
113 114

/* lower 2 bits: modulo 3, higher bits: shift */
115
static uint16_t scale_factor_modshift[64];
116
/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
117
static int32_t scale_factor_mult[15][3];
118 119 120 121 122
/* mult table for layer 2 group quantization */

#define SCALE_GEN(v) \
{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }

Michael Niedermayer's avatar
Michael Niedermayer committed
123
static const int32_t scale_factor_mult2[3][3] = {
124 125 126
    SCALE_GEN(4.0 / 3.0), /* 3 steps */
    SCALE_GEN(4.0 / 5.0), /* 5 steps */
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
127 128
};

129
static DECLARE_ALIGNED_16(MPA_INT, window[512]);
130

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
/**
 * Convert region offsets to region sizes and truncate
 * size to big_values.
 */
void ff_region_offset2size(GranuleDef *g){
    int i, k, j=0;
    g->region_size[2] = (576 / 2);
    for(i=0;i<3;i++) {
        k = FFMIN(g->region_size[i], g->big_values);
        g->region_size[i] = k - j;
        j = k;
    }
}

void ff_init_short_region(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2)
        g->region_size[0] = (36 / 2);
    else {
        if (s->sample_rate_index <= 2)
            g->region_size[0] = (36 / 2);
        else if (s->sample_rate_index != 8)
            g->region_size[0] = (54 / 2);
        else
            g->region_size[0] = (108 / 2);
    }
    g->region_size[1] = (576 / 2);
}

void ff_init_long_region(MPADecodeContext *s, GranuleDef *g, int ra1, int ra2){
    int l;
    g->region_size[0] =
        band_index_long[s->sample_rate_index][ra1 + 1] >> 1;
    /* should not overflow */
    l = FFMIN(ra1 + ra2 + 2, 22);
    g->region_size[1] =
        band_index_long[s->sample_rate_index][l] >> 1;
}

void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2) {
        if (g->switch_point) {
            /* if switched mode, we handle the 36 first samples as
                long blocks.  For 8000Hz, we handle the 48 first
                exponents as long blocks (XXX: check this!) */
            if (s->sample_rate_index <= 2)
                g->long_end = 8;
            else if (s->sample_rate_index != 8)
                g->long_end = 6;
            else
                g->long_end = 4; /* 8000 Hz */

            g->short_start = 2 + (s->sample_rate_index != 8);
        } else {
            g->long_end = 0;
            g->short_start = 0;
        }
    } else {
        g->short_start = 13;
        g->long_end = 22;
    }
}

193 194 195 196 197
/* layer 1 unscaling */
/* n = number of bits of the mantissa minus 1 */
static inline int l1_unscale(int n, int mant, int scale_factor)
{
    int shift, mod;
198
    int64_t val;
199 200 201 202 203 204

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
    val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
    shift += n;
205 206
    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
    return (int)((val + (1LL << (shift - 1))) >> shift);
207 208 209 210 211 212 213 214 215
}

static inline int l2_unscale_group(int steps, int mant, int scale_factor)
{
    int shift, mod, val;

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
216 217 218 219 220 221

    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
    /* NOTE: at this point, 0 <= shift <= 21 */
    if (shift > 0)
        val = (val + (1 << (shift - 1))) >> shift;
    return val;
222 223 224 225 226 227 228 229
}

/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
static inline int l3_unscale(int value, int exponent)
{
    unsigned int m;
    int e;

230 231 232 233
    e = table_4_3_exp  [4*value + (exponent&3)];
    m = table_4_3_value[4*value + (exponent&3)];
    e -= (exponent >> 2);
    assert(e>=1);
234
    if (e > 31)
235
        return 0;
236
    m = (m + (1 << (e-1))) >> e;
237

238 239 240
    return m;
}

241 242 243 244 245 246
/* all integer n^(4/3) computation code */
#define DEV_ORDER 13

#define POW_FRAC_BITS 24
#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
247
#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
248 249 250

static int dev_4_3_coefs[DEV_ORDER];

251
#if 0 /* unused */
252 253 254 255 256
static int pow_mult3[3] = {
    POW_FIX(1.0),
    POW_FIX(1.25992104989487316476),
    POW_FIX(1.58740105196819947474),
};
257
#endif
258 259 260 261 262 263 264 265 266 267 268 269

static void int_pow_init(void)
{
    int i, a;

    a = POW_FIX(1.0);
    for(i=0;i<DEV_ORDER;i++) {
        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
        dev_4_3_coefs[i] = a;
    }
}

270
#if 0 /* unused, remove? */
271 272 273 274 275
/* return the mantissa and the binary exponent */
static int int_pow(int i, int *exp_ptr)
{
    int e, er, eq, j;
    int a, a1;
276

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    /* renormalize */
    a = i;
    e = POW_FRAC_BITS;
    while (a < (1 << (POW_FRAC_BITS - 1))) {
        a = a << 1;
        e--;
    }
    a -= (1 << POW_FRAC_BITS);
    a1 = 0;
    for(j = DEV_ORDER - 1; j >= 0; j--)
        a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
    a = (1 << POW_FRAC_BITS) + a1;
    /* exponent compute (exact) */
    e = e * 4;
    er = e % 3;
    eq = e / 3;
    a = POW_MULL(a, pow_mult3[er]);
    while (a >= 2 * POW_FRAC_ONE) {
        a = a >> 1;
        eq++;
    }
    /* convert to float */
    while (a < POW_FRAC_ONE) {
        a = a << 1;
        eq--;
    }
303
    /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
304
#if POW_FRAC_BITS > FRAC_BITS
305 306 307 308 309 310 311
    a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
    /* correct overflow */
    if (a >= 2 * (1 << FRAC_BITS)) {
        a = a >> 1;
        eq++;
    }
#endif
312 313 314
    *exp_ptr = eq;
    return a;
}
315
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
316 317 318 319

static int decode_init(AVCodecContext * avctx)
{
    MPADecodeContext *s = avctx->priv_data;
320
    static int init=0;
321
    int i, j, k;
Fabrice Bellard's avatar
Fabrice Bellard committed
322

Michel Bardiaux's avatar
Michel Bardiaux committed
323 324
    s->avctx = avctx;

325
#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
326 327 328
    avctx->sample_fmt= SAMPLE_FMT_S32;
#else
    avctx->sample_fmt= SAMPLE_FMT_S16;
329
#endif
330
    s->error_recognition= avctx->error_recognition;
331

Michael Niedermayer's avatar
Michael Niedermayer committed
332
    if(avctx->antialias_algo != FF_AA_FLOAT)
Michael Niedermayer's avatar
10l  
Michael Niedermayer committed
333 334 335 336
        s->compute_antialias= compute_antialias_integer;
    else
        s->compute_antialias= compute_antialias_float;

337
    if (!init && !avctx->parse_only) {
338 339
        int offset;

340 341 342 343
        /* scale factors table for layer 1/2 */
        for(i=0;i<64;i++) {
            int shift, mod;
            /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
344
            shift = (i / 3);
345 346 347 348 349 350 351 352
            mod = i % 3;
            scale_factor_modshift[i] = mod | (shift << 2);
        }

        /* scale factor multiply for layer 1 */
        for(i=0;i<15;i++) {
            int n, norm;
            n = i + 2;
353
            norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
354 355 356
            scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm, FRAC_BITS);
            scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm, FRAC_BITS);
            scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm, FRAC_BITS);
Michel Bardiaux's avatar
Michel Bardiaux committed
357
            dprintf(avctx, "%d: norm=%x s=%x %x %x\n",
358
                    i, norm,
359 360 361 362
                    scale_factor_mult[i][0],
                    scale_factor_mult[i][1],
                    scale_factor_mult[i][2]);
        }
363

364
        ff_mpa_synth_init(window);
365

366
        /* huffman decode tables */
367
        offset = 0;
368 369
        for(i=1;i<16;i++) {
            const HuffTable *h = &mpa_huff_tables[i];
370
            int xsize, x, y;
371
            unsigned int n;
Michael Niedermayer's avatar
Michael Niedermayer committed
372 373
            uint8_t  tmp_bits [512];
            uint16_t tmp_codes[512];
Michael Niedermayer's avatar
Michael Niedermayer committed
374 375 376

            memset(tmp_bits , 0, sizeof(tmp_bits ));
            memset(tmp_codes, 0, sizeof(tmp_codes));
377 378 379

            xsize = h->xsize;
            n = xsize * xsize;
380

381 382
            j = 0;
            for(x=0;x<xsize;x++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
383
                for(y=0;y<xsize;y++){
Michael Niedermayer's avatar
Michael Niedermayer committed
384 385
                    tmp_bits [(x << 5) | y | ((x&&y)<<4)]= h->bits [j  ];
                    tmp_codes[(x << 5) | y | ((x&&y)<<4)]= h->codes[j++];
Michael Niedermayer's avatar
Michael Niedermayer committed
386
                }
387
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
388 389

            /* XXX: fail test */
390 391
            huff_vlc[i].table = huff_vlc_tables+offset;
            huff_vlc[i].table_allocated = huff_vlc_tables_sizes[i];
Michael Niedermayer's avatar
Michael Niedermayer committed
392
            init_vlc(&huff_vlc[i], 7, 512,
393 394 395
                     tmp_bits, 1, 1, tmp_codes, 2, 2,
                     INIT_VLC_USE_NEW_STATIC);
            offset += huff_vlc_tables_sizes[i];
396
        }
397
        assert(offset == FF_ARRAY_ELEMS(huff_vlc_tables));
398 399

        offset = 0;
400
        for(i=0;i<2;i++) {
401 402
            huff_quad_vlc[i].table = huff_quad_vlc_tables+offset;
            huff_quad_vlc[i].table_allocated = huff_quad_vlc_tables_sizes[i];
403
            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
404 405 406
                     mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
            offset += huff_quad_vlc_tables_sizes[i];
407
        }
408
        assert(offset == FF_ARRAY_ELEMS(huff_quad_vlc_tables));
409 410 411 412 413 414 415 416 417 418

        for(i=0;i<9;i++) {
            k = 0;
            for(j=0;j<22;j++) {
                band_index_long[i][j] = k;
                k += band_size_long[i][j];
            }
            band_index_long[i][22] = k;
        }

419
        /* compute n ^ (4/3) and store it in mantissa/exp format */
420

421
        int_pow_init();
422
        for(i=1;i<TABLE_4_3_SIZE;i++) {
423
            double f, fm;
424
            int e, m;
425 426
            f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
            fm = frexp(f, &e);
427
            m = (uint32_t)(fm*(1LL<<31) + 0.5);
428
            e+= FRAC_BITS - 31 + 5 - 100;
429

430 431
            /* normalized to FRAC_BITS */
            table_4_3_value[i] = m;
432
            table_4_3_exp[i] = -e;
433
        }
434
        for(i=0; i<512*16; i++){
435 436
            int exponent= (i>>4);
            double f= pow(i&15, 4.0 / 3.0) * pow(2, (exponent-400)*0.25 + FRAC_BITS + 5);
437
            expval_table[exponent][i&15]= llrint(f);
438
            if((i&15)==1)
439
                exp_table[exponent]= llrint(f);
440
        }
441

442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
        for(i=0;i<7;i++) {
            float f;
            int v;
            if (i != 6) {
                f = tan((double)i * M_PI / 12.0);
                v = FIXR(f / (1.0 + f));
            } else {
                v = FIXR(1.0);
            }
            is_table[0][i] = v;
            is_table[1][6 - i] = v;
        }
        /* invalid values */
        for(i=7;i<16;i++)
            is_table[0][i] = is_table[1][i] = 0.0;

        for(i=0;i<16;i++) {
            double f;
            int e, k;

            for(j=0;j<2;j++) {
                e = -(j + 1) * ((i + 1) >> 1);
                f = pow(2.0, e / 4.0);
                k = i & 1;
                is_table_lsf[j][k ^ 1][i] = FIXR(f);
                is_table_lsf[j][k][i] = FIXR(1.0);
Michel Bardiaux's avatar
Michel Bardiaux committed
468
                dprintf(avctx, "is_table_lsf %d %d: %x %x\n",
469 470 471 472 473 474 475 476 477
                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
            }
        }

        for(i=0;i<8;i++) {
            float ci, cs, ca;
            ci = ci_table[i];
            cs = 1.0 / sqrt(1.0 + ci * ci);
            ca = cs * ci;
Michael Niedermayer's avatar
Michael Niedermayer committed
478 479 480
            csa_table[i][0] = FIXHR(cs/4);
            csa_table[i][1] = FIXHR(ca/4);
            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
481
            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
482 483 484
            csa_table_float[i][0] = cs;
            csa_table_float[i][1] = ca;
            csa_table_float[i][2] = ca + cs;
485
            csa_table_float[i][3] = ca - cs;
486 487 488 489
        }

        /* compute mdct windows */
        for(i=0;i<36;i++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
490 491
            for(j=0; j<4; j++){
                double d;
492

Michael Niedermayer's avatar
Michael Niedermayer committed
493 494
                if(j==2 && i%3 != 1)
                    continue;
495

Michael Niedermayer's avatar
Michael Niedermayer committed
496 497 498 499 500 501 502 503 504 505 506
                d= sin(M_PI * (i + 0.5) / 36.0);
                if(j==1){
                    if     (i>=30) d= 0;
                    else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
                    else if(i>=18) d= 1;
                }else if(j==3){
                    if     (i<  6) d= 0;
                    else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
                    else if(i< 18) d= 1;
                }
                //merge last stage of imdct into the window coefficients
Michael Niedermayer's avatar
Michael Niedermayer committed
507 508 509 510 511 512
                d*= 0.5 / cos(M_PI*(2*i + 19)/72);

                if(j==2)
                    mdct_win[j][i/3] = FIXHR((d / (1<<5)));
                else
                    mdct_win[j][i  ] = FIXHR((d / (1<<5)));
Michael Niedermayer's avatar
Michael Niedermayer committed
513
            }
514 515 516 517 518 519 520 521 522 523 524
        }

        /* NOTE: we do frequency inversion adter the MDCT by changing
           the sign of the right window coefs */
        for(j=0;j<4;j++) {
            for(i=0;i<36;i+=2) {
                mdct_win[j + 4][i] = mdct_win[j][i];
                mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
            }
        }

Fabrice Bellard's avatar
Fabrice Bellard committed
525 526 527
        init = 1;
    }

Roberto Togni's avatar
Roberto Togni committed
528 529
    if (avctx->codec_id == CODEC_ID_MP3ADU)
        s->adu_mode = 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
530 531 532
    return 0;
}

533
/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
534 535 536

/* cos(i*pi/64) */

537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
#define COS0_0  FIXHR(0.50060299823519630134/2)
#define COS0_1  FIXHR(0.50547095989754365998/2)
#define COS0_2  FIXHR(0.51544730992262454697/2)
#define COS0_3  FIXHR(0.53104259108978417447/2)
#define COS0_4  FIXHR(0.55310389603444452782/2)
#define COS0_5  FIXHR(0.58293496820613387367/2)
#define COS0_6  FIXHR(0.62250412303566481615/2)
#define COS0_7  FIXHR(0.67480834145500574602/2)
#define COS0_8  FIXHR(0.74453627100229844977/2)
#define COS0_9  FIXHR(0.83934964541552703873/2)
#define COS0_10 FIXHR(0.97256823786196069369/2)
#define COS0_11 FIXHR(1.16943993343288495515/4)
#define COS0_12 FIXHR(1.48416461631416627724/4)
#define COS0_13 FIXHR(2.05778100995341155085/8)
#define COS0_14 FIXHR(3.40760841846871878570/8)
#define COS0_15 FIXHR(10.19000812354805681150/32)

#define COS1_0 FIXHR(0.50241928618815570551/2)
#define COS1_1 FIXHR(0.52249861493968888062/2)
#define COS1_2 FIXHR(0.56694403481635770368/2)
#define COS1_3 FIXHR(0.64682178335999012954/2)
#define COS1_4 FIXHR(0.78815462345125022473/2)
#define COS1_5 FIXHR(1.06067768599034747134/4)
#define COS1_6 FIXHR(1.72244709823833392782/4)
#define COS1_7 FIXHR(5.10114861868916385802/16)

#define COS2_0 FIXHR(0.50979557910415916894/2)
#define COS2_1 FIXHR(0.60134488693504528054/2)
#define COS2_2 FIXHR(0.89997622313641570463/2)
#define COS2_3 FIXHR(2.56291544774150617881/8)

#define COS3_0 FIXHR(0.54119610014619698439/2)
#define COS3_1 FIXHR(1.30656296487637652785/4)

#define COS4_0 FIXHR(0.70710678118654752439/2)
572 573

/* butterfly operator */
574
#define BF(a, b, c, s)\
575 576 577 578
{\
    tmp0 = tab[a] + tab[b];\
    tmp1 = tab[a] - tab[b];\
    tab[a] = tmp0;\
579
    tab[b] = MULH(tmp1<<(s), c);\
580 581 582 583
}

#define BF1(a, b, c, d)\
{\
584 585
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
586 587 588 589 590
    tab[c] += tab[d];\
}

#define BF2(a, b, c, d)\
{\
591 592
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
593 594 595 596 597 598 599 600 601
    tab[c] += tab[d];\
    tab[a] += tab[c];\
    tab[c] += tab[b];\
    tab[b] += tab[d];\
}

#define ADD(a, b) tab[a] += tab[b]

/* DCT32 without 1/sqrt(2) coef zero scaling. */
602
static void dct32(int32_t *out, int32_t *tab)
603 604 605 606
{
    int tmp0, tmp1;

    /* pass 1 */
607 608
    BF( 0, 31, COS0_0 , 1);
    BF(15, 16, COS0_15, 5);
609
    /* pass 2 */
610 611
    BF( 0, 15, COS1_0 , 1);
    BF(16, 31,-COS1_0 , 1);
612
    /* pass 1 */
613 614
    BF( 7, 24, COS0_7 , 1);
    BF( 8, 23, COS0_8 , 1);
615
    /* pass 2 */
616 617
    BF( 7,  8, COS1_7 , 4);
    BF(23, 24,-COS1_7 , 4);
618
    /* pass 3 */
619 620 621 622
    BF( 0,  7, COS2_0 , 1);
    BF( 8, 15,-COS2_0 , 1);
    BF(16, 23, COS2_0 , 1);
    BF(24, 31,-COS2_0 , 1);
623
    /* pass 1 */
624 625
    BF( 3, 28, COS0_3 , 1);
    BF(12, 19, COS0_12, 2);
626
    /* pass 2 */
627 628
    BF( 3, 12, COS1_3 , 1);
    BF(19, 28,-COS1_3 , 1);
629
    /* pass 1 */
630 631
    BF( 4, 27, COS0_4 , 1);
    BF(11, 20, COS0_11, 2);
632
    /* pass 2 */
633 634
    BF( 4, 11, COS1_4 , 1);
    BF(20, 27,-COS1_4 , 1);
635
    /* pass 3 */
636 637 638 639
    BF( 3,  4, COS2_3 , 3);
    BF(11, 12,-COS2_3 , 3);
    BF(19, 20, COS2_3 , 3);
    BF(27, 28,-COS2_3 , 3);
640
    /* pass 4 */
641 642 643 644 645 646 647 648
    BF( 0,  3, COS3_0 , 1);
    BF( 4,  7,-COS3_0 , 1);
    BF( 8, 11, COS3_0 , 1);
    BF(12, 15,-COS3_0 , 1);
    BF(16, 19, COS3_0 , 1);
    BF(20, 23,-COS3_0 , 1);
    BF(24, 27, COS3_0 , 1);
    BF(28, 31,-COS3_0 , 1);
649

650 651 652


    /* pass 1 */
653 654
    BF( 1, 30, COS0_1 , 1);
    BF(14, 17, COS0_14, 3);
655
    /* pass 2 */
656 657
    BF( 1, 14, COS1_1 , 1);
    BF(17, 30,-COS1_1 , 1);
658
    /* pass 1 */
659 660
    BF( 6, 25, COS0_6 , 1);
    BF( 9, 22, COS0_9 , 1);
661
    /* pass 2 */
662 663
    BF( 6,  9, COS1_6 , 2);
    BF(22, 25,-COS1_6 , 2);
664
    /* pass 3 */
665 666 667 668
    BF( 1,  6, COS2_1 , 1);
    BF( 9, 14,-COS2_1 , 1);
    BF(17, 22, COS2_1 , 1);
    BF(25, 30,-COS2_1 , 1);
669

670
    /* pass 1 */
671 672
    BF( 2, 29, COS0_2 , 1);
    BF(13, 18, COS0_13, 3);
673
    /* pass 2 */
674 675
    BF( 2, 13, COS1_2 , 1);
    BF(18, 29,-COS1_2 , 1);
676
    /* pass 1 */
677 678
    BF( 5, 26, COS0_5 , 1);
    BF(10, 21, COS0_10, 1);
679
    /* pass 2 */
680 681
    BF( 5, 10, COS1_5 , 2);
    BF(21, 26,-COS1_5 , 2);
682
    /* pass 3 */
683 684 685 686
    BF( 2,  5, COS2_2 , 1);
    BF(10, 13,-COS2_2 , 1);
    BF(18, 21, COS2_2 , 1);
    BF(26, 29,-COS2_2 , 1);
687
    /* pass 4 */
688 689 690 691 692 693 694 695
    BF( 1,  2, COS3_1 , 2);
    BF( 5,  6,-COS3_1 , 2);
    BF( 9, 10, COS3_1 , 2);
    BF(13, 14,-COS3_1 , 2);
    BF(17, 18, COS3_1 , 2);
    BF(21, 22,-COS3_1 , 2);
    BF(25, 26, COS3_1 , 2);
    BF(29, 30,-COS3_1 , 2);
696

697
    /* pass 5 */
698 699 700
    BF1( 0,  1,  2,  3);
    BF2( 4,  5,  6,  7);
    BF1( 8,  9, 10, 11);
701 702 703 704 705
    BF2(12, 13, 14, 15);
    BF1(16, 17, 18, 19);
    BF2(20, 21, 22, 23);
    BF1(24, 25, 26, 27);
    BF2(28, 29, 30, 31);
706

707
    /* pass 6 */
708

709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
    ADD( 8, 12);
    ADD(12, 10);
    ADD(10, 14);
    ADD(14,  9);
    ADD( 9, 13);
    ADD(13, 11);
    ADD(11, 15);

    out[ 0] = tab[0];
    out[16] = tab[1];
    out[ 8] = tab[2];
    out[24] = tab[3];
    out[ 4] = tab[4];
    out[20] = tab[5];
    out[12] = tab[6];
    out[28] = tab[7];
    out[ 2] = tab[8];
    out[18] = tab[9];
    out[10] = tab[10];
    out[26] = tab[11];
    out[ 6] = tab[12];
    out[22] = tab[13];
    out[14] = tab[14];
    out[30] = tab[15];
733

734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761
    ADD(24, 28);
    ADD(28, 26);
    ADD(26, 30);
    ADD(30, 25);
    ADD(25, 29);
    ADD(29, 27);
    ADD(27, 31);

    out[ 1] = tab[16] + tab[24];
    out[17] = tab[17] + tab[25];
    out[ 9] = tab[18] + tab[26];
    out[25] = tab[19] + tab[27];
    out[ 5] = tab[20] + tab[28];
    out[21] = tab[21] + tab[29];
    out[13] = tab[22] + tab[30];
    out[29] = tab[23] + tab[31];
    out[ 3] = tab[24] + tab[20];
    out[19] = tab[25] + tab[21];
    out[11] = tab[26] + tab[22];
    out[27] = tab[27] + tab[23];
    out[ 7] = tab[28] + tab[18];
    out[23] = tab[29] + tab[19];
    out[15] = tab[30] + tab[17];
    out[31] = tab[31];
}

#if FRAC_BITS <= 15

762
static inline int round_sample(int *sum)
763 764
{
    int sum1;
765 766
    sum1 = (*sum) >> OUT_SHIFT;
    *sum &= (1<<OUT_SHIFT)-1;
767 768 769 770
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
771
    return sum1;
772 773
}

Luca Barbato's avatar
Luca Barbato committed
774 775
/* signed 16x16 -> 32 multiply add accumulate */
#define MACS(rt, ra, rb) MAC16(rt, ra, rb)
Siarhei Siamashka's avatar
Siarhei Siamashka committed
776

Luca Barbato's avatar
Luca Barbato committed
777 778
/* signed 16x16 -> 32 multiply */
#define MULS(ra, rb) MUL16(ra, rb)
779

780 781
#define MLSS(rt, ra, rb) MLS16(rt, ra, rb)

782 783
#else

784
static inline int round_sample(int64_t *sum)
785 786
{
    int sum1;
787 788
    sum1 = (int)((*sum) >> OUT_SHIFT);
    *sum &= (1<<OUT_SHIFT)-1;
789 790 791 792
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
793
    return sum1;
794 795
}

796
#   define MULS(ra, rb) MUL64(ra, rb)
797 798
#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
799 800
#endif

801 802 803 804 805 806 807 808 809 810
#define SUM8(op, sum, w, p)               \
{                                         \
    op(sum, (w)[0 * 64], p[0 * 64]);      \
    op(sum, (w)[1 * 64], p[1 * 64]);      \
    op(sum, (w)[2 * 64], p[2 * 64]);      \
    op(sum, (w)[3 * 64], p[3 * 64]);      \
    op(sum, (w)[4 * 64], p[4 * 64]);      \
    op(sum, (w)[5 * 64], p[5 * 64]);      \
    op(sum, (w)[6 * 64], p[6 * 64]);      \
    op(sum, (w)[7 * 64], p[7 * 64]);      \
811 812 813 814 815 816
}

#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
{                                               \
    int tmp;\
    tmp = p[0 * 64];\
817 818
    op1(sum1, (w1)[0 * 64], tmp);\
    op2(sum2, (w2)[0 * 64], tmp);\
819
    tmp = p[1 * 64];\
820 821
    op1(sum1, (w1)[1 * 64], tmp);\
    op2(sum2, (w2)[1 * 64], tmp);\
822
    tmp = p[2 * 64];\
823 824
    op1(sum1, (w1)[2 * 64], tmp);\
    op2(sum2, (w2)[2 * 64], tmp);\
825
    tmp = p[3 * 64];\
826 827
    op1(sum1, (w1)[3 * 64], tmp);\
    op2(sum2, (w2)[3 * 64], tmp);\
828
    tmp = p[4 * 64];\
829 830
    op1(sum1, (w1)[4 * 64], tmp);\
    op2(sum2, (w2)[4 * 64], tmp);\
831
    tmp = p[5 * 64];\
832 833
    op1(sum1, (w1)[5 * 64], tmp);\
    op2(sum2, (w2)[5 * 64], tmp);\
834
    tmp = p[6 * 64];\
835 836
    op1(sum1, (w1)[6 * 64], tmp);\
    op2(sum2, (w2)[6 * 64], tmp);\
837
    tmp = p[7 * 64];\
838 839
    op1(sum1, (w1)[7 * 64], tmp);\
    op2(sum2, (w2)[7 * 64], tmp);\
840 841
}

842 843 844 845 846 847 848
void ff_mpa_synth_init(MPA_INT *window)
{
    int i;

    /* max = 18760, max sum over all 16 coefs : 44736 */
    for(i=0;i<257;i++) {
        int v;
849
        v = ff_mpa_enwindow[i];
850 851 852 853 854 855 856 857
#if WFRAC_BITS < 16
        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
#endif
        window[i] = v;
        if ((i & 63) != 0)
            v = -v;
        if (i != 0)
            window[512 - i] = v;
858
    }
859
}
860 861 862 863

/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
   32 samples. */
/* XXX: optimize by avoiding ring buffer usage */
864
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
865
                         MPA_INT *window, int *dither_state,
866
                         OUT_INT *samples, int incr,
867
                         int32_t sb_samples[SBLIMIT])
868
{
869
    int32_t tmp[32];
870
    register MPA_INT *synth_buf;
Alex Beregszaszi's avatar
Alex Beregszaszi committed
871
    register const MPA_INT *w, *w2, *p;
872
    int j, offset, v;
873
    OUT_INT *samples2;
874
#if FRAC_BITS <= 15
875
    int sum, sum2;
876
#else
877
    int64_t sum, sum2;
878
#endif
879

880
    dct32(tmp, sb_samples);
881

882 883
    offset = *synth_buf_offset;
    synth_buf = synth_buf_ptr + offset;
884 885 886 887

    for(j=0;j<32;j++) {
        v = tmp[j];
#if FRAC_BITS <= 15
888 889
        /* NOTE: can cause a loss in precision if very high amplitude
           sound */
890
        v = av_clip_int16(v);
891 892 893 894 895 896
#endif
        synth_buf[j] = v;
    }
    /* copy to avoid wrap */
    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));

897
    samples2 = samples + 31 * incr;
898
    w = window;
899 900
    w2 = window + 31;

901
    sum = *dither_state;
902
    p = synth_buf + 16;
903
    SUM8(MACS, sum, w, p);
904
    p = synth_buf + 48;
905
    SUM8(MLSS, sum, w + 32, p);