mpegaudiodec.c 78 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2
/*
 * MPEG Audio decoder
Fabrice Bellard's avatar
Fabrice Bellard committed
3
 * Copyright (c) 2001, 2002 Fabrice Bellard.
Fabrice Bellard's avatar
Fabrice Bellard committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Fabrice Bellard's avatar
Fabrice Bellard committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Fabrice Bellard's avatar
Fabrice Bellard committed
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
20
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
21 22 23 24

/**
 * @file mpegaudiodec.c
 * MPEG Audio decoder.
25
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
26

Fabrice Bellard's avatar
Fabrice Bellard committed
27
#include "avcodec.h"
28
#include "bitstream.h"
29
#include "dsputil.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
30 31

/*
32 33 34
 * TODO:
 *  - in low precision mode, use more 16 bit multiplies in synth filter
 *  - test lsf / mpeg25 extensively.
Fabrice Bellard's avatar
Fabrice Bellard committed
35 36
 */

Roberto Togni's avatar
Roberto Togni committed
37
#include "mpegaudio.h"
38
#include "mpegaudiodecheader.h"
39

Luca Barbato's avatar
Luca Barbato committed
40 41
#include "mathops.h"

42 43 44 45
/* WARNING: only correct for posititive numbers */
#define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)

Michael Niedermayer's avatar
Michael Niedermayer committed
46 47
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))

48 49
/****************/

Fabrice Bellard's avatar
Fabrice Bellard committed
50 51
#define HEADER_SIZE 4

52 53
/* layer 3 "granule" */
typedef struct GranuleDef {
54
    uint8_t scfsi;
55 56 57 58
    int part2_3_length;
    int big_values;
    int global_gain;
    int scalefac_compress;
59 60
    uint8_t block_type;
    uint8_t switch_point;
61 62
    int table_select[3];
    int subblock_gain[3];
63 64
    uint8_t scalefac_scale;
    uint8_t count1table_select;
65 66 67
    int region_size[3]; /* number of huffman codes in each region */
    int preflag;
    int short_start, long_end; /* long/short band indexes */
68 69
    uint8_t scale_factors[40];
    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
70
} GranuleDef;
Fabrice Bellard's avatar
Fabrice Bellard committed
71

72
#include "mpegaudiodata.h"
73 74
#include "mpegaudiodectab.h"

75 76 77
static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);

78
/* vlc structure for decoding layer 3 huffman tables */
79
static VLC huff_vlc[16];
80 81 82 83 84 85 86 87
static VLC_TYPE huff_vlc_tables[
  0+128+128+128+130+128+154+166+
  142+204+190+170+542+460+662+414
  ][2];
static const int huff_vlc_tables_sizes[16] = {
  0, 128, 128, 128, 130, 128, 154, 166,
  142, 204, 190, 170, 542, 460, 662, 414
};
88
static VLC huff_quad_vlc[2];
89 90 91 92
static VLC_TYPE huff_quad_vlc_tables[128+16][2];
static const int huff_quad_vlc_tables_sizes[2] = {
  128, 16
};
93
/* computed from band_size_long */
94
static uint16_t band_index_long[9][23];
95
/* XXX: free when all decoders are closed */
96
#define TABLE_4_3_SIZE (8191 + 16)*4
97 98
static int8_t  table_4_3_exp[TABLE_4_3_SIZE];
static uint32_t table_4_3_value[TABLE_4_3_SIZE];
99
static uint32_t exp_table[512];
100
static uint32_t expval_table[512][16];
101
/* intensity stereo coef table */
102 103
static int32_t is_table[2][16];
static int32_t is_table_lsf[2][2][16];
104 105
static int32_t csa_table[8][4];
static float csa_table_float[8][4];
106
static int32_t mdct_win[8][36];
107 108

/* lower 2 bits: modulo 3, higher bits: shift */
109
static uint16_t scale_factor_modshift[64];
110
/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
111
static int32_t scale_factor_mult[15][3];
112 113 114 115 116
/* mult table for layer 2 group quantization */

#define SCALE_GEN(v) \
{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }

Michael Niedermayer's avatar
Michael Niedermayer committed
117
static const int32_t scale_factor_mult2[3][3] = {
118 119 120
    SCALE_GEN(4.0 / 3.0), /* 3 steps */
    SCALE_GEN(4.0 / 5.0), /* 5 steps */
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
121 122
};

123
static DECLARE_ALIGNED_16(MPA_INT, window[512]);
124

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
/**
 * Convert region offsets to region sizes and truncate
 * size to big_values.
 */
void ff_region_offset2size(GranuleDef *g){
    int i, k, j=0;
    g->region_size[2] = (576 / 2);
    for(i=0;i<3;i++) {
        k = FFMIN(g->region_size[i], g->big_values);
        g->region_size[i] = k - j;
        j = k;
    }
}

void ff_init_short_region(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2)
        g->region_size[0] = (36 / 2);
    else {
        if (s->sample_rate_index <= 2)
            g->region_size[0] = (36 / 2);
        else if (s->sample_rate_index != 8)
            g->region_size[0] = (54 / 2);
        else
            g->region_size[0] = (108 / 2);
    }
    g->region_size[1] = (576 / 2);
}

void ff_init_long_region(MPADecodeContext *s, GranuleDef *g, int ra1, int ra2){
    int l;
    g->region_size[0] =
        band_index_long[s->sample_rate_index][ra1 + 1] >> 1;
    /* should not overflow */
    l = FFMIN(ra1 + ra2 + 2, 22);
    g->region_size[1] =
        band_index_long[s->sample_rate_index][l] >> 1;
}

void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2) {
        if (g->switch_point) {
            /* if switched mode, we handle the 36 first samples as
                long blocks.  For 8000Hz, we handle the 48 first
                exponents as long blocks (XXX: check this!) */
            if (s->sample_rate_index <= 2)
                g->long_end = 8;
            else if (s->sample_rate_index != 8)
                g->long_end = 6;
            else
                g->long_end = 4; /* 8000 Hz */

            g->short_start = 2 + (s->sample_rate_index != 8);
        } else {
            g->long_end = 0;
            g->short_start = 0;
        }
    } else {
        g->short_start = 13;
        g->long_end = 22;
    }
}

187 188 189 190 191
/* layer 1 unscaling */
/* n = number of bits of the mantissa minus 1 */
static inline int l1_unscale(int n, int mant, int scale_factor)
{
    int shift, mod;
192
    int64_t val;
193 194 195 196 197 198

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
    val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
    shift += n;
199 200
    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
    return (int)((val + (1LL << (shift - 1))) >> shift);
201 202 203 204 205 206 207 208 209
}

static inline int l2_unscale_group(int steps, int mant, int scale_factor)
{
    int shift, mod, val;

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
210 211 212 213 214 215

    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
    /* NOTE: at this point, 0 <= shift <= 21 */
    if (shift > 0)
        val = (val + (1 << (shift - 1))) >> shift;
    return val;
216 217 218 219 220 221 222 223
}

/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
static inline int l3_unscale(int value, int exponent)
{
    unsigned int m;
    int e;

224 225 226 227
    e = table_4_3_exp  [4*value + (exponent&3)];
    m = table_4_3_value[4*value + (exponent&3)];
    e -= (exponent >> 2);
    assert(e>=1);
228
    if (e > 31)
229
        return 0;
230
    m = (m + (1 << (e-1))) >> e;
231

232 233 234
    return m;
}

235 236 237 238 239 240
/* all integer n^(4/3) computation code */
#define DEV_ORDER 13

#define POW_FRAC_BITS 24
#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
241
#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
242 243 244

static int dev_4_3_coefs[DEV_ORDER];

245
#if 0 /* unused */
246 247 248 249 250
static int pow_mult3[3] = {
    POW_FIX(1.0),
    POW_FIX(1.25992104989487316476),
    POW_FIX(1.58740105196819947474),
};
251
#endif
252 253 254 255 256 257 258 259 260 261 262 263

static void int_pow_init(void)
{
    int i, a;

    a = POW_FIX(1.0);
    for(i=0;i<DEV_ORDER;i++) {
        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
        dev_4_3_coefs[i] = a;
    }
}

264
#if 0 /* unused, remove? */
265 266 267 268 269
/* return the mantissa and the binary exponent */
static int int_pow(int i, int *exp_ptr)
{
    int e, er, eq, j;
    int a, a1;
270

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
    /* renormalize */
    a = i;
    e = POW_FRAC_BITS;
    while (a < (1 << (POW_FRAC_BITS - 1))) {
        a = a << 1;
        e--;
    }
    a -= (1 << POW_FRAC_BITS);
    a1 = 0;
    for(j = DEV_ORDER - 1; j >= 0; j--)
        a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
    a = (1 << POW_FRAC_BITS) + a1;
    /* exponent compute (exact) */
    e = e * 4;
    er = e % 3;
    eq = e / 3;
    a = POW_MULL(a, pow_mult3[er]);
    while (a >= 2 * POW_FRAC_ONE) {
        a = a >> 1;
        eq++;
    }
    /* convert to float */
    while (a < POW_FRAC_ONE) {
        a = a << 1;
        eq--;
    }
297
    /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
298
#if POW_FRAC_BITS > FRAC_BITS
299 300 301 302 303 304 305
    a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
    /* correct overflow */
    if (a >= 2 * (1 << FRAC_BITS)) {
        a = a >> 1;
        eq++;
    }
#endif
306 307 308
    *exp_ptr = eq;
    return a;
}
309
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
310 311 312 313

static int decode_init(AVCodecContext * avctx)
{
    MPADecodeContext *s = avctx->priv_data;
314
    static int init=0;
315
    int i, j, k;
Fabrice Bellard's avatar
Fabrice Bellard committed
316

Michel Bardiaux's avatar
Michel Bardiaux committed
317 318
    s->avctx = avctx;

319
#if CONFIG_MPEGAUDIO_HP && CONFIG_AUDIO_NONSHORT
320 321 322
    avctx->sample_fmt= SAMPLE_FMT_S32;
#else
    avctx->sample_fmt= SAMPLE_FMT_S16;
323
#endif
324
    s->error_recognition= avctx->error_recognition;
325

Michael Niedermayer's avatar
Michael Niedermayer committed
326
    if(avctx->antialias_algo != FF_AA_FLOAT)
Michael Niedermayer's avatar
10l  
Michael Niedermayer committed
327 328 329 330
        s->compute_antialias= compute_antialias_integer;
    else
        s->compute_antialias= compute_antialias_float;

331
    if (!init && !avctx->parse_only) {
332 333
        int offset;

334 335 336 337
        /* scale factors table for layer 1/2 */
        for(i=0;i<64;i++) {
            int shift, mod;
            /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
338
            shift = (i / 3);
339 340 341 342 343 344 345 346
            mod = i % 3;
            scale_factor_modshift[i] = mod | (shift << 2);
        }

        /* scale factor multiply for layer 1 */
        for(i=0;i<15;i++) {
            int n, norm;
            n = i + 2;
347
            norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
348 349 350
            scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm, FRAC_BITS);
            scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm, FRAC_BITS);
            scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm, FRAC_BITS);
Michel Bardiaux's avatar
Michel Bardiaux committed
351
            dprintf(avctx, "%d: norm=%x s=%x %x %x\n",
352
                    i, norm,
353 354 355 356
                    scale_factor_mult[i][0],
                    scale_factor_mult[i][1],
                    scale_factor_mult[i][2]);
        }
357

358
        ff_mpa_synth_init(window);
359

360
        /* huffman decode tables */
361
        offset = 0;
362 363
        for(i=1;i<16;i++) {
            const HuffTable *h = &mpa_huff_tables[i];
364
            int xsize, x, y;
365
            unsigned int n;
Michael Niedermayer's avatar
Michael Niedermayer committed
366 367
            uint8_t  tmp_bits [512];
            uint16_t tmp_codes[512];
Michael Niedermayer's avatar
Michael Niedermayer committed
368 369 370

            memset(tmp_bits , 0, sizeof(tmp_bits ));
            memset(tmp_codes, 0, sizeof(tmp_codes));
371 372 373

            xsize = h->xsize;
            n = xsize * xsize;
374

375 376
            j = 0;
            for(x=0;x<xsize;x++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
377
                for(y=0;y<xsize;y++){
Michael Niedermayer's avatar
Michael Niedermayer committed
378 379
                    tmp_bits [(x << 5) | y | ((x&&y)<<4)]= h->bits [j  ];
                    tmp_codes[(x << 5) | y | ((x&&y)<<4)]= h->codes[j++];
Michael Niedermayer's avatar
Michael Niedermayer committed
380
                }
381
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
382 383

            /* XXX: fail test */
384 385
            huff_vlc[i].table = huff_vlc_tables+offset;
            huff_vlc[i].table_allocated = huff_vlc_tables_sizes[i];
Michael Niedermayer's avatar
Michael Niedermayer committed
386
            init_vlc(&huff_vlc[i], 7, 512,
387 388 389
                     tmp_bits, 1, 1, tmp_codes, 2, 2,
                     INIT_VLC_USE_NEW_STATIC);
            offset += huff_vlc_tables_sizes[i];
390
        }
391
        assert(offset == FF_ARRAY_ELEMS(huff_vlc_tables));
392 393

        offset = 0;
394
        for(i=0;i<2;i++) {
395 396
            huff_quad_vlc[i].table = huff_quad_vlc_tables+offset;
            huff_quad_vlc[i].table_allocated = huff_quad_vlc_tables_sizes[i];
397
            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
398 399 400
                     mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1,
                     INIT_VLC_USE_NEW_STATIC);
            offset += huff_quad_vlc_tables_sizes[i];
401
        }
402
        assert(offset == FF_ARRAY_ELEMS(huff_quad_vlc_tables));
403 404 405 406 407 408 409 410 411 412

        for(i=0;i<9;i++) {
            k = 0;
            for(j=0;j<22;j++) {
                band_index_long[i][j] = k;
                k += band_size_long[i][j];
            }
            band_index_long[i][22] = k;
        }

413
        /* compute n ^ (4/3) and store it in mantissa/exp format */
414

415
        int_pow_init();
416
        for(i=1;i<TABLE_4_3_SIZE;i++) {
417
            double f, fm;
418
            int e, m;
419 420
            f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
            fm = frexp(f, &e);
421
            m = (uint32_t)(fm*(1LL<<31) + 0.5);
422
            e+= FRAC_BITS - 31 + 5 - 100;
423

424 425
            /* normalized to FRAC_BITS */
            table_4_3_value[i] = m;
426
            table_4_3_exp[i] = -e;
427
        }
428
        for(i=0; i<512*16; i++){
429 430
            int exponent= (i>>4);
            double f= pow(i&15, 4.0 / 3.0) * pow(2, (exponent-400)*0.25 + FRAC_BITS + 5);
431
            expval_table[exponent][i&15]= llrint(f);
432
            if((i&15)==1)
433
                exp_table[exponent]= llrint(f);
434
        }
435

436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
        for(i=0;i<7;i++) {
            float f;
            int v;
            if (i != 6) {
                f = tan((double)i * M_PI / 12.0);
                v = FIXR(f / (1.0 + f));
            } else {
                v = FIXR(1.0);
            }
            is_table[0][i] = v;
            is_table[1][6 - i] = v;
        }
        /* invalid values */
        for(i=7;i<16;i++)
            is_table[0][i] = is_table[1][i] = 0.0;

        for(i=0;i<16;i++) {
            double f;
            int e, k;

            for(j=0;j<2;j++) {
                e = -(j + 1) * ((i + 1) >> 1);
                f = pow(2.0, e / 4.0);
                k = i & 1;
                is_table_lsf[j][k ^ 1][i] = FIXR(f);
                is_table_lsf[j][k][i] = FIXR(1.0);
Michel Bardiaux's avatar
Michel Bardiaux committed
462
                dprintf(avctx, "is_table_lsf %d %d: %x %x\n",
463 464 465 466 467 468 469 470 471
                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
            }
        }

        for(i=0;i<8;i++) {
            float ci, cs, ca;
            ci = ci_table[i];
            cs = 1.0 / sqrt(1.0 + ci * ci);
            ca = cs * ci;
Michael Niedermayer's avatar
Michael Niedermayer committed
472 473 474
            csa_table[i][0] = FIXHR(cs/4);
            csa_table[i][1] = FIXHR(ca/4);
            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
475
            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
476 477 478
            csa_table_float[i][0] = cs;
            csa_table_float[i][1] = ca;
            csa_table_float[i][2] = ca + cs;
479
            csa_table_float[i][3] = ca - cs;
480 481 482 483
        }

        /* compute mdct windows */
        for(i=0;i<36;i++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
484 485
            for(j=0; j<4; j++){
                double d;
486

Michael Niedermayer's avatar
Michael Niedermayer committed
487 488
                if(j==2 && i%3 != 1)
                    continue;
489

Michael Niedermayer's avatar
Michael Niedermayer committed
490 491 492 493 494 495 496 497 498 499 500
                d= sin(M_PI * (i + 0.5) / 36.0);
                if(j==1){
                    if     (i>=30) d= 0;
                    else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
                    else if(i>=18) d= 1;
                }else if(j==3){
                    if     (i<  6) d= 0;
                    else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
                    else if(i< 18) d= 1;
                }
                //merge last stage of imdct into the window coefficients
Michael Niedermayer's avatar
Michael Niedermayer committed
501 502 503 504 505 506
                d*= 0.5 / cos(M_PI*(2*i + 19)/72);

                if(j==2)
                    mdct_win[j][i/3] = FIXHR((d / (1<<5)));
                else
                    mdct_win[j][i  ] = FIXHR((d / (1<<5)));
Michael Niedermayer's avatar
Michael Niedermayer committed
507
            }
508 509 510 511 512 513 514 515 516 517 518
        }

        /* NOTE: we do frequency inversion adter the MDCT by changing
           the sign of the right window coefs */
        for(j=0;j<4;j++) {
            for(i=0;i<36;i+=2) {
                mdct_win[j + 4][i] = mdct_win[j][i];
                mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
            }
        }

Fabrice Bellard's avatar
Fabrice Bellard committed
519 520 521
        init = 1;
    }

Roberto Togni's avatar
Roberto Togni committed
522 523
    if (avctx->codec_id == CODEC_ID_MP3ADU)
        s->adu_mode = 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
524 525 526
    return 0;
}

527
/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
528 529 530

/* cos(i*pi/64) */

531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
#define COS0_0  FIXHR(0.50060299823519630134/2)
#define COS0_1  FIXHR(0.50547095989754365998/2)
#define COS0_2  FIXHR(0.51544730992262454697/2)
#define COS0_3  FIXHR(0.53104259108978417447/2)
#define COS0_4  FIXHR(0.55310389603444452782/2)
#define COS0_5  FIXHR(0.58293496820613387367/2)
#define COS0_6  FIXHR(0.62250412303566481615/2)
#define COS0_7  FIXHR(0.67480834145500574602/2)
#define COS0_8  FIXHR(0.74453627100229844977/2)
#define COS0_9  FIXHR(0.83934964541552703873/2)
#define COS0_10 FIXHR(0.97256823786196069369/2)
#define COS0_11 FIXHR(1.16943993343288495515/4)
#define COS0_12 FIXHR(1.48416461631416627724/4)
#define COS0_13 FIXHR(2.05778100995341155085/8)
#define COS0_14 FIXHR(3.40760841846871878570/8)
#define COS0_15 FIXHR(10.19000812354805681150/32)

#define COS1_0 FIXHR(0.50241928618815570551/2)
#define COS1_1 FIXHR(0.52249861493968888062/2)
#define COS1_2 FIXHR(0.56694403481635770368/2)
#define COS1_3 FIXHR(0.64682178335999012954/2)
#define COS1_4 FIXHR(0.78815462345125022473/2)
#define COS1_5 FIXHR(1.06067768599034747134/4)
#define COS1_6 FIXHR(1.72244709823833392782/4)
#define COS1_7 FIXHR(5.10114861868916385802/16)

#define COS2_0 FIXHR(0.50979557910415916894/2)
#define COS2_1 FIXHR(0.60134488693504528054/2)
#define COS2_2 FIXHR(0.89997622313641570463/2)
#define COS2_3 FIXHR(2.56291544774150617881/8)

#define COS3_0 FIXHR(0.54119610014619698439/2)
#define COS3_1 FIXHR(1.30656296487637652785/4)

#define COS4_0 FIXHR(0.70710678118654752439/2)
566 567

/* butterfly operator */
568
#define BF(a, b, c, s)\
569 570 571 572
{\
    tmp0 = tab[a] + tab[b];\
    tmp1 = tab[a] - tab[b];\
    tab[a] = tmp0;\
573
    tab[b] = MULH(tmp1<<(s), c);\
574 575 576 577
}

#define BF1(a, b, c, d)\
{\
578 579
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
580 581 582 583 584
    tab[c] += tab[d];\
}

#define BF2(a, b, c, d)\
{\
585 586
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
587 588 589 590 591 592 593 594 595
    tab[c] += tab[d];\
    tab[a] += tab[c];\
    tab[c] += tab[b];\
    tab[b] += tab[d];\
}

#define ADD(a, b) tab[a] += tab[b]

/* DCT32 without 1/sqrt(2) coef zero scaling. */
596
static void dct32(int32_t *out, int32_t *tab)
597 598 599 600
{
    int tmp0, tmp1;

    /* pass 1 */
601 602
    BF( 0, 31, COS0_0 , 1);
    BF(15, 16, COS0_15, 5);
603
    /* pass 2 */
604 605
    BF( 0, 15, COS1_0 , 1);
    BF(16, 31,-COS1_0 , 1);
606
    /* pass 1 */
607 608
    BF( 7, 24, COS0_7 , 1);
    BF( 8, 23, COS0_8 , 1);
609
    /* pass 2 */
610 611
    BF( 7,  8, COS1_7 , 4);
    BF(23, 24,-COS1_7 , 4);
612
    /* pass 3 */
613 614 615 616
    BF( 0,  7, COS2_0 , 1);
    BF( 8, 15,-COS2_0 , 1);
    BF(16, 23, COS2_0 , 1);
    BF(24, 31,-COS2_0 , 1);
617
    /* pass 1 */
618 619
    BF( 3, 28, COS0_3 , 1);
    BF(12, 19, COS0_12, 2);
620
    /* pass 2 */
621 622
    BF( 3, 12, COS1_3 , 1);
    BF(19, 28,-COS1_3 , 1);
623
    /* pass 1 */
624 625
    BF( 4, 27, COS0_4 , 1);
    BF(11, 20, COS0_11, 2);
626
    /* pass 2 */
627 628
    BF( 4, 11, COS1_4 , 1);
    BF(20, 27,-COS1_4 , 1);
629
    /* pass 3 */
630 631 632 633
    BF( 3,  4, COS2_3 , 3);
    BF(11, 12,-COS2_3 , 3);
    BF(19, 20, COS2_3 , 3);
    BF(27, 28,-COS2_3 , 3);
634
    /* pass 4 */
635 636 637 638 639 640 641 642
    BF( 0,  3, COS3_0 , 1);
    BF( 4,  7,-COS3_0 , 1);
    BF( 8, 11, COS3_0 , 1);
    BF(12, 15,-COS3_0 , 1);
    BF(16, 19, COS3_0 , 1);
    BF(20, 23,-COS3_0 , 1);
    BF(24, 27, COS3_0 , 1);
    BF(28, 31,-COS3_0 , 1);
643

644 645 646


    /* pass 1 */
647 648
    BF( 1, 30, COS0_1 , 1);
    BF(14, 17, COS0_14, 3);
649
    /* pass 2 */
650 651
    BF( 1, 14, COS1_1 , 1);
    BF(17, 30,-COS1_1 , 1);
652
    /* pass 1 */
653 654
    BF( 6, 25, COS0_6 , 1);
    BF( 9, 22, COS0_9 , 1);
655
    /* pass 2 */
656 657
    BF( 6,  9, COS1_6 , 2);
    BF(22, 25,-COS1_6 , 2);
658
    /* pass 3 */
659 660 661 662
    BF( 1,  6, COS2_1 , 1);
    BF( 9, 14,-COS2_1 , 1);
    BF(17, 22, COS2_1 , 1);
    BF(25, 30,-COS2_1 , 1);
663

664
    /* pass 1 */
665 666
    BF( 2, 29, COS0_2 , 1);
    BF(13, 18, COS0_13, 3);
667
    /* pass 2 */
668 669
    BF( 2, 13, COS1_2 , 1);
    BF(18, 29,-COS1_2 , 1);
670
    /* pass 1 */
671 672
    BF( 5, 26, COS0_5 , 1);
    BF(10, 21, COS0_10, 1);
673
    /* pass 2 */
674 675
    BF( 5, 10, COS1_5 , 2);
    BF(21, 26,-COS1_5 , 2);
676
    /* pass 3 */
677 678 679 680
    BF( 2,  5, COS2_2 , 1);
    BF(10, 13,-COS2_2 , 1);
    BF(18, 21, COS2_2 , 1);
    BF(26, 29,-COS2_2 , 1);
681
    /* pass 4 */
682 683 684 685 686 687 688 689
    BF( 1,  2, COS3_1 , 2);
    BF( 5,  6,-COS3_1 , 2);
    BF( 9, 10, COS3_1 , 2);
    BF(13, 14,-COS3_1 , 2);
    BF(17, 18, COS3_1 , 2);
    BF(21, 22,-COS3_1 , 2);
    BF(25, 26, COS3_1 , 2);
    BF(29, 30,-COS3_1 , 2);
690

691
    /* pass 5 */
692 693 694
    BF1( 0,  1,  2,  3);
    BF2( 4,  5,  6,  7);
    BF1( 8,  9, 10, 11);
695 696 697 698 699
    BF2(12, 13, 14, 15);
    BF1(16, 17, 18, 19);
    BF2(20, 21, 22, 23);
    BF1(24, 25, 26, 27);
    BF2(28, 29, 30, 31);
700

701
    /* pass 6 */
702

703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
    ADD( 8, 12);
    ADD(12, 10);
    ADD(10, 14);
    ADD(14,  9);
    ADD( 9, 13);
    ADD(13, 11);
    ADD(11, 15);

    out[ 0] = tab[0];
    out[16] = tab[1];
    out[ 8] = tab[2];
    out[24] = tab[3];
    out[ 4] = tab[4];
    out[20] = tab[5];
    out[12] = tab[6];
    out[28] = tab[7];
    out[ 2] = tab[8];
    out[18] = tab[9];
    out[10] = tab[10];
    out[26] = tab[11];
    out[ 6] = tab[12];
    out[22] = tab[13];
    out[14] = tab[14];
    out[30] = tab[15];
727

728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
    ADD(24, 28);
    ADD(28, 26);
    ADD(26, 30);
    ADD(30, 25);
    ADD(25, 29);
    ADD(29, 27);
    ADD(27, 31);

    out[ 1] = tab[16] + tab[24];
    out[17] = tab[17] + tab[25];
    out[ 9] = tab[18] + tab[26];
    out[25] = tab[19] + tab[27];
    out[ 5] = tab[20] + tab[28];
    out[21] = tab[21] + tab[29];
    out[13] = tab[22] + tab[30];
    out[29] = tab[23] + tab[31];
    out[ 3] = tab[24] + tab[20];
    out[19] = tab[25] + tab[21];
    out[11] = tab[26] + tab[22];
    out[27] = tab[27] + tab[23];
    out[ 7] = tab[28] + tab[18];
    out[23] = tab[29] + tab[19];
    out[15] = tab[30] + tab[17];
    out[31] = tab[31];
}

#if FRAC_BITS <= 15

756
static inline int round_sample(int *sum)
757 758
{
    int sum1;
759 760
    sum1 = (*sum) >> OUT_SHIFT;
    *sum &= (1<<OUT_SHIFT)-1;
761 762 763 764
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
765
    return sum1;
766 767
}

Luca Barbato's avatar
Luca Barbato committed
768 769
/* signed 16x16 -> 32 multiply add accumulate */
#define MACS(rt, ra, rb) MAC16(rt, ra, rb)
Siarhei Siamashka's avatar
Siarhei Siamashka committed
770

Luca Barbato's avatar
Luca Barbato committed
771 772
/* signed 16x16 -> 32 multiply */
#define MULS(ra, rb) MUL16(ra, rb)
773

774 775
#define MLSS(rt, ra, rb) MLS16(rt, ra, rb)

776 777
#else

778
static inline int round_sample(int64_t *sum)
779 780
{
    int sum1;
781 782
    sum1 = (int)((*sum) >> OUT_SHIFT);
    *sum &= (1<<OUT_SHIFT)-1;
783 784 785 786
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
787
    return sum1;
788 789
}

790
#   define MULS(ra, rb) MUL64(ra, rb)
791 792
#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
793 794
#endif

795 796 797 798 799 800 801 802 803 804
#define SUM8(op, sum, w, p)               \
{                                         \
    op(sum, (w)[0 * 64], p[0 * 64]);      \
    op(sum, (w)[1 * 64], p[1 * 64]);      \
    op(sum, (w)[2 * 64], p[2 * 64]);      \
    op(sum, (w)[3 * 64], p[3 * 64]);      \
    op(sum, (w)[4 * 64], p[4 * 64]);      \
    op(sum, (w)[5 * 64], p[5 * 64]);      \
    op(sum, (w)[6 * 64], p[6 * 64]);      \
    op(sum, (w)[7 * 64], p[7 * 64]);      \
805 806 807 808 809 810
}

#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
{                                               \
    int tmp;\
    tmp = p[0 * 64];\
811 812
    op1(sum1, (w1)[0 * 64], tmp);\
    op2(sum2, (w2)[0 * 64], tmp);\
813
    tmp = p[1 * 64];\
814 815
    op1(sum1, (w1)[1 * 64], tmp);\
    op2(sum2, (w2)[1 * 64], tmp);\
816
    tmp = p[2 * 64];\
817 818
    op1(sum1, (w1)[2 * 64], tmp);\
    op2(sum2, (w2)[2 * 64], tmp);\
819
    tmp = p[3 * 64];\
820 821
    op1(sum1, (w1)[3 * 64], tmp);\
    op2(sum2, (w2)[3 * 64], tmp);\
822
    tmp = p[4 * 64];\
823 824
    op1(sum1, (w1)[4 * 64], tmp);\
    op2(sum2, (w2)[4 * 64], tmp);\
825
    tmp = p[5 * 64];\
826 827
    op1(sum1, (w1)[5 * 64], tmp);\
    op2(sum2, (w2)[5 * 64], tmp);\
828
    tmp = p[6 * 64];\
829 830
    op1(sum1, (w1)[6 * 64], tmp);\
    op2(sum2, (w2)[6 * 64], tmp);\
831
    tmp = p[7 * 64];\
832 833
    op1(sum1, (w1)[7 * 64], tmp);\
    op2(sum2, (w2)[7 * 64], tmp);\
834 835
}

836 837 838 839 840 841 842
void ff_mpa_synth_init(MPA_INT *window)
{
    int i;

    /* max = 18760, max sum over all 16 coefs : 44736 */
    for(i=0;i<257;i++) {
        int v;
843
        v = ff_mpa_enwindow[i];
844 845 846 847 848 849 850 851
#if WFRAC_BITS < 16
        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
#endif
        window[i] = v;
        if ((i & 63) != 0)
            v = -v;
        if (i != 0)
            window[512 - i] = v;
852
    }
853
}
854 855 856 857

/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
   32 samples. */
/* XXX: optimize by avoiding ring buffer usage */
858
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
859
                         MPA_INT *window, int *dither_state,
860
                         OUT_INT *samples, int incr,
861
                         int32_t sb_samples[SBLIMIT])
862
{
863
    int32_t tmp[32];
864
    register MPA_INT *synth_buf;
Alex Beregszaszi's avatar
Alex Beregszaszi committed
865
    register const MPA_INT *w, *w2, *p;
866
    int j, offset, v;
867
    OUT_INT *samples2;
868
#if FRAC_BITS <= 15
869
    int sum, sum2;
870
#else
871
    int64_t sum, sum2;
872
#endif
873

874
    dct32(tmp, sb_samples);
875

876 877
    offset = *synth_buf_offset;
    synth_buf = synth_buf_ptr + offset;
878 879 880 881

    for(j=0;j<32;j++) {
        v = tmp[j];
#if FRAC_BITS <= 15
882 883
        /* NOTE: can cause a loss in precision if very high amplitude
           sound */
884
        v = av_clip_int16(v);
885 886 887 888 889 890
#endif
        synth_buf[j] = v;
    }
    /* copy to avoid wrap */
    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));

891
    samples2 = samples + 31 * incr;
892
    w = window;
893 894
    w2 = window + 31;

895
    sum = *dither_state;
896
    p = synth_buf + 16;
897
    SUM8(MACS, sum, w, p);
898
    p = synth_buf + 48;
899
    SUM8(MLSS, sum, w + 32, p);
900
    *samples = round_sample(&sum);
901
    samples += incr;
902 903
    w++;

904 905 906 907 908
    /* we calculate two samples at the same time to avoid one memory
       access per two sample */
    for(j=1;j<16;j++) {
        sum2 = 0;
        p = synth_buf + 16 + j;
909
        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
910
        p = synth_buf + 48 - j;
911
        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
912

913
        *samples = round_sample(&sum);
914
        samples += incr;
915 916
        sum += sum2;
        *samples2 = round_sample(&sum);