mpegaudiodec.c 79.9 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2
/*
 * MPEG Audio decoder
Fabrice Bellard's avatar
Fabrice Bellard committed
3
 * Copyright (c) 2001, 2002 Fabrice Bellard.
Fabrice Bellard's avatar
Fabrice Bellard committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Fabrice Bellard's avatar
Fabrice Bellard committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Fabrice Bellard's avatar
Fabrice Bellard committed
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
20
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
21 22 23 24

/**
 * @file mpegaudiodec.c
 * MPEG Audio decoder.
25
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
26

27
//#define DEBUG
Fabrice Bellard's avatar
Fabrice Bellard committed
28
#include "avcodec.h"
29
#include "bitstream.h"
30
#include "dsputil.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
31 32

/*
33 34 35
 * TODO:
 *  - in low precision mode, use more 16 bit multiplies in synth filter
 *  - test lsf / mpeg25 extensively.
Fabrice Bellard's avatar
Fabrice Bellard committed
36 37
 */

38 39
/* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
   audio decoder */
40
#ifdef CONFIG_MPEGAUDIO_HP
41
#   define USE_HIGHPRECISION
42
#endif
43

Roberto Togni's avatar
Roberto Togni committed
44
#include "mpegaudio.h"
45
#include "mpegaudiodecheader.h"
46

Luca Barbato's avatar
Luca Barbato committed
47 48
#include "mathops.h"

49 50 51 52
/* WARNING: only correct for posititive numbers */
#define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)

Michael Niedermayer's avatar
Michael Niedermayer committed
53 54
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))

55 56
/****************/

Fabrice Bellard's avatar
Fabrice Bellard committed
57 58
#define HEADER_SIZE 4

59 60
/* layer 3 "granule" */
typedef struct GranuleDef {
61
    uint8_t scfsi;
62 63 64 65
    int part2_3_length;
    int big_values;
    int global_gain;
    int scalefac_compress;
66 67
    uint8_t block_type;
    uint8_t switch_point;
68 69
    int table_select[3];
    int subblock_gain[3];
70 71
    uint8_t scalefac_scale;
    uint8_t count1table_select;
72 73 74
    int region_size[3]; /* number of huffman codes in each region */
    int preflag;
    int short_start, long_end; /* long/short band indexes */
75 76
    uint8_t scale_factors[40];
    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
77
} GranuleDef;
Fabrice Bellard's avatar
Fabrice Bellard committed
78

79
#include "mpegaudiodata.h"
80 81
#include "mpegaudiodectab.h"

82 83 84
static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);

85
/* vlc structure for decoding layer 3 huffman tables */
86
static VLC huff_vlc[16];
87 88
static VLC huff_quad_vlc[2];
/* computed from band_size_long */
89
static uint16_t band_index_long[9][23];
90
/* XXX: free when all decoders are closed */
91
#define TABLE_4_3_SIZE (8191 + 16)*4
92 93
static int8_t  table_4_3_exp[TABLE_4_3_SIZE];
static uint32_t table_4_3_value[TABLE_4_3_SIZE];
94
static uint32_t exp_table[512];
95
static uint32_t expval_table[512][16];
96
/* intensity stereo coef table */
97 98
static int32_t is_table[2][16];
static int32_t is_table_lsf[2][2][16];
99 100
static int32_t csa_table[8][4];
static float csa_table_float[8][4];
101
static int32_t mdct_win[8][36];
102 103

/* lower 2 bits: modulo 3, higher bits: shift */
104
static uint16_t scale_factor_modshift[64];
105
/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
106
static int32_t scale_factor_mult[15][3];
107 108 109 110 111
/* mult table for layer 2 group quantization */

#define SCALE_GEN(v) \
{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }

Michael Niedermayer's avatar
Michael Niedermayer committed
112
static const int32_t scale_factor_mult2[3][3] = {
113 114 115
    SCALE_GEN(4.0 / 3.0), /* 3 steps */
    SCALE_GEN(4.0 / 5.0), /* 5 steps */
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
116 117
};

118
static DECLARE_ALIGNED_16(MPA_INT, window[512]);
119

120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
/**
 * Convert region offsets to region sizes and truncate
 * size to big_values.
 */
void ff_region_offset2size(GranuleDef *g){
    int i, k, j=0;
    g->region_size[2] = (576 / 2);
    for(i=0;i<3;i++) {
        k = FFMIN(g->region_size[i], g->big_values);
        g->region_size[i] = k - j;
        j = k;
    }
}

void ff_init_short_region(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2)
        g->region_size[0] = (36 / 2);
    else {
        if (s->sample_rate_index <= 2)
            g->region_size[0] = (36 / 2);
        else if (s->sample_rate_index != 8)
            g->region_size[0] = (54 / 2);
        else
            g->region_size[0] = (108 / 2);
    }
    g->region_size[1] = (576 / 2);
}

void ff_init_long_region(MPADecodeContext *s, GranuleDef *g, int ra1, int ra2){
    int l;
    g->region_size[0] =
        band_index_long[s->sample_rate_index][ra1 + 1] >> 1;
    /* should not overflow */
    l = FFMIN(ra1 + ra2 + 2, 22);
    g->region_size[1] =
        band_index_long[s->sample_rate_index][l] >> 1;
}

void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g){
    if (g->block_type == 2) {
        if (g->switch_point) {
            /* if switched mode, we handle the 36 first samples as
                long blocks.  For 8000Hz, we handle the 48 first
                exponents as long blocks (XXX: check this!) */
            if (s->sample_rate_index <= 2)
                g->long_end = 8;
            else if (s->sample_rate_index != 8)
                g->long_end = 6;
            else
                g->long_end = 4; /* 8000 Hz */

            g->short_start = 2 + (s->sample_rate_index != 8);
        } else {
            g->long_end = 0;
            g->short_start = 0;
        }
    } else {
        g->short_start = 13;
        g->long_end = 22;
    }
}

182 183 184 185 186
/* layer 1 unscaling */
/* n = number of bits of the mantissa minus 1 */
static inline int l1_unscale(int n, int mant, int scale_factor)
{
    int shift, mod;
187
    int64_t val;
188 189 190 191 192 193

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
    val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
    shift += n;
194 195
    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
    return (int)((val + (1LL << (shift - 1))) >> shift);
196 197 198 199 200 201 202 203 204
}

static inline int l2_unscale_group(int steps, int mant, int scale_factor)
{
    int shift, mod, val;

    shift = scale_factor_modshift[scale_factor];
    mod = shift & 3;
    shift >>= 2;
205 206 207 208 209 210

    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
    /* NOTE: at this point, 0 <= shift <= 21 */
    if (shift > 0)
        val = (val + (1 << (shift - 1))) >> shift;
    return val;
211 212 213 214 215 216 217 218
}

/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
static inline int l3_unscale(int value, int exponent)
{
    unsigned int m;
    int e;

219 220 221 222
    e = table_4_3_exp  [4*value + (exponent&3)];
    m = table_4_3_value[4*value + (exponent&3)];
    e -= (exponent >> 2);
    assert(e>=1);
223
    if (e > 31)
224
        return 0;
225
    m = (m + (1 << (e-1))) >> e;
226

227 228 229
    return m;
}

230 231 232 233 234 235
/* all integer n^(4/3) computation code */
#define DEV_ORDER 13

#define POW_FRAC_BITS 24
#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
236
#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
237 238 239

static int dev_4_3_coefs[DEV_ORDER];

240
#if 0 /* unused */
241 242 243 244 245
static int pow_mult3[3] = {
    POW_FIX(1.0),
    POW_FIX(1.25992104989487316476),
    POW_FIX(1.58740105196819947474),
};
246
#endif
247 248 249 250 251 252 253 254 255 256 257 258

static void int_pow_init(void)
{
    int i, a;

    a = POW_FIX(1.0);
    for(i=0;i<DEV_ORDER;i++) {
        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
        dev_4_3_coefs[i] = a;
    }
}

259
#if 0 /* unused, remove? */
260 261 262 263 264
/* return the mantissa and the binary exponent */
static int int_pow(int i, int *exp_ptr)
{
    int e, er, eq, j;
    int a, a1;
265

266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
    /* renormalize */
    a = i;
    e = POW_FRAC_BITS;
    while (a < (1 << (POW_FRAC_BITS - 1))) {
        a = a << 1;
        e--;
    }
    a -= (1 << POW_FRAC_BITS);
    a1 = 0;
    for(j = DEV_ORDER - 1; j >= 0; j--)
        a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
    a = (1 << POW_FRAC_BITS) + a1;
    /* exponent compute (exact) */
    e = e * 4;
    er = e % 3;
    eq = e / 3;
    a = POW_MULL(a, pow_mult3[er]);
    while (a >= 2 * POW_FRAC_ONE) {
        a = a >> 1;
        eq++;
    }
    /* convert to float */
    while (a < POW_FRAC_ONE) {
        a = a << 1;
        eq--;
    }
292
    /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
293
#if POW_FRAC_BITS > FRAC_BITS
294 295 296 297 298 299 300
    a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
    /* correct overflow */
    if (a >= 2 * (1 << FRAC_BITS)) {
        a = a >> 1;
        eq++;
    }
#endif
301 302 303
    *exp_ptr = eq;
    return a;
}
304
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
305 306 307 308

static int decode_init(AVCodecContext * avctx)
{
    MPADecodeContext *s = avctx->priv_data;
309
    static int init=0;
310
    int i, j, k;
Fabrice Bellard's avatar
Fabrice Bellard committed
311

Michel Bardiaux's avatar
Michel Bardiaux committed
312 313
    s->avctx = avctx;

314
#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
315 316 317
    avctx->sample_fmt= SAMPLE_FMT_S32;
#else
    avctx->sample_fmt= SAMPLE_FMT_S16;
318
#endif
319
    s->error_resilience= avctx->error_resilience;
320

Michael Niedermayer's avatar
Michael Niedermayer committed
321
    if(avctx->antialias_algo != FF_AA_FLOAT)
Michael Niedermayer's avatar
10l  
Michael Niedermayer committed
322 323 324 325
        s->compute_antialias= compute_antialias_integer;
    else
        s->compute_antialias= compute_antialias_float;

326
    if (!init && !avctx->parse_only) {
327 328 329 330
        /* scale factors table for layer 1/2 */
        for(i=0;i<64;i++) {
            int shift, mod;
            /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
331
            shift = (i / 3);
332 333 334 335 336 337 338 339
            mod = i % 3;
            scale_factor_modshift[i] = mod | (shift << 2);
        }

        /* scale factor multiply for layer 1 */
        for(i=0;i<15;i++) {
            int n, norm;
            n = i + 2;
340
            norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
341 342 343
            scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
            scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
            scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
Michel Bardiaux's avatar
Michel Bardiaux committed
344
            dprintf(avctx, "%d: norm=%x s=%x %x %x\n",
345
                    i, norm,
346 347 348 349
                    scale_factor_mult[i][0],
                    scale_factor_mult[i][1],
                    scale_factor_mult[i][2]);
        }
350

351
        ff_mpa_synth_init(window);
352

353 354 355
        /* huffman decode tables */
        for(i=1;i<16;i++) {
            const HuffTable *h = &mpa_huff_tables[i];
356
            int xsize, x, y;
357
            unsigned int n;
Michael Niedermayer's avatar
Michael Niedermayer committed
358 359
            uint8_t  tmp_bits [512];
            uint16_t tmp_codes[512];
Michael Niedermayer's avatar
Michael Niedermayer committed
360 361 362

            memset(tmp_bits , 0, sizeof(tmp_bits ));
            memset(tmp_codes, 0, sizeof(tmp_codes));
363 364 365

            xsize = h->xsize;
            n = xsize * xsize;
366

367 368
            j = 0;
            for(x=0;x<xsize;x++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
369
                for(y=0;y<xsize;y++){
Michael Niedermayer's avatar
Michael Niedermayer committed
370 371
                    tmp_bits [(x << 5) | y | ((x&&y)<<4)]= h->bits [j  ];
                    tmp_codes[(x << 5) | y | ((x&&y)<<4)]= h->codes[j++];
Michael Niedermayer's avatar
Michael Niedermayer committed
372
                }
373
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
374 375

            /* XXX: fail test */
Michael Niedermayer's avatar
Michael Niedermayer committed
376
            init_vlc(&huff_vlc[i], 7, 512,
Michael Niedermayer's avatar
Michael Niedermayer committed
377
                     tmp_bits, 1, 1, tmp_codes, 2, 2, 1);
378 379
        }
        for(i=0;i<2;i++) {
380
            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
381
                     mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1);
382 383 384 385 386 387 388 389 390 391 392
        }

        for(i=0;i<9;i++) {
            k = 0;
            for(j=0;j<22;j++) {
                band_index_long[i][j] = k;
                k += band_size_long[i][j];
            }
            band_index_long[i][22] = k;
        }

393
        /* compute n ^ (4/3) and store it in mantissa/exp format */
394

395
        int_pow_init();
396
        for(i=1;i<TABLE_4_3_SIZE;i++) {
397
            double f, fm;
398
            int e, m;
399 400
            f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
            fm = frexp(f, &e);
401
            m = (uint32_t)(fm*(1LL<<31) + 0.5);
402
            e+= FRAC_BITS - 31 + 5 - 100;
403

404 405
            /* normalized to FRAC_BITS */
            table_4_3_value[i] = m;
406 407
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0));
            table_4_3_exp[i] = -e;
408
        }
409
        for(i=0; i<512*16; i++){
410 411
            int exponent= (i>>4);
            double f= pow(i&15, 4.0 / 3.0) * pow(2, (exponent-400)*0.25 + FRAC_BITS + 5);
412
            expval_table[exponent][i&15]= llrint(f);
413
            if((i&15)==1)
414
                exp_table[exponent]= llrint(f);
415
        }
416

417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
        for(i=0;i<7;i++) {
            float f;
            int v;
            if (i != 6) {
                f = tan((double)i * M_PI / 12.0);
                v = FIXR(f / (1.0 + f));
            } else {
                v = FIXR(1.0);
            }
            is_table[0][i] = v;
            is_table[1][6 - i] = v;
        }
        /* invalid values */
        for(i=7;i<16;i++)
            is_table[0][i] = is_table[1][i] = 0.0;

        for(i=0;i<16;i++) {
            double f;
            int e, k;

            for(j=0;j<2;j++) {
                e = -(j + 1) * ((i + 1) >> 1);
                f = pow(2.0, e / 4.0);
                k = i & 1;
                is_table_lsf[j][k ^ 1][i] = FIXR(f);
                is_table_lsf[j][k][i] = FIXR(1.0);
Michel Bardiaux's avatar
Michel Bardiaux committed
443
                dprintf(avctx, "is_table_lsf %d %d: %x %x\n",
444 445 446 447 448 449 450 451 452
                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
            }
        }

        for(i=0;i<8;i++) {
            float ci, cs, ca;
            ci = ci_table[i];
            cs = 1.0 / sqrt(1.0 + ci * ci);
            ca = cs * ci;
Michael Niedermayer's avatar
Michael Niedermayer committed
453 454 455
            csa_table[i][0] = FIXHR(cs/4);
            csa_table[i][1] = FIXHR(ca/4);
            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
456
            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
457 458 459
            csa_table_float[i][0] = cs;
            csa_table_float[i][1] = ca;
            csa_table_float[i][2] = ca + cs;
460
            csa_table_float[i][3] = ca - cs;
461
//            printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
Michael Niedermayer's avatar
Michael Niedermayer committed
462
//            av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs);
463 464 465 466
        }

        /* compute mdct windows */
        for(i=0;i<36;i++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
467 468
            for(j=0; j<4; j++){
                double d;
469

Michael Niedermayer's avatar
Michael Niedermayer committed
470 471
                if(j==2 && i%3 != 1)
                    continue;
472

Michael Niedermayer's avatar
Michael Niedermayer committed
473 474 475 476 477 478 479 480 481 482 483
                d= sin(M_PI * (i + 0.5) / 36.0);
                if(j==1){
                    if     (i>=30) d= 0;
                    else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
                    else if(i>=18) d= 1;
                }else if(j==3){
                    if     (i<  6) d= 0;
                    else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
                    else if(i< 18) d= 1;
                }
                //merge last stage of imdct into the window coefficients
Michael Niedermayer's avatar
Michael Niedermayer committed
484 485 486 487 488 489
                d*= 0.5 / cos(M_PI*(2*i + 19)/72);

                if(j==2)
                    mdct_win[j][i/3] = FIXHR((d / (1<<5)));
                else
                    mdct_win[j][i  ] = FIXHR((d / (1<<5)));
Michael Niedermayer's avatar
Michael Niedermayer committed
490 491
//                av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5));
            }
492 493 494 495 496 497 498 499 500 501 502 503 504
        }

        /* NOTE: we do frequency inversion adter the MDCT by changing
           the sign of the right window coefs */
        for(j=0;j<4;j++) {
            for(i=0;i<36;i+=2) {
                mdct_win[j + 4][i] = mdct_win[j][i];
                mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
            }
        }

#if defined(DEBUG)
        for(j=0;j<8;j++) {
505
            av_log(avctx, AV_LOG_DEBUG, "win%d=\n", j);
506
            for(i=0;i<36;i++)
507 508
                av_log(avctx, AV_LOG_DEBUG, "%f, ", (double)mdct_win[j][i] / FRAC_ONE);
            av_log(avctx, AV_LOG_DEBUG, "\n");
509 510
        }
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
511 512 513
        init = 1;
    }

514 515 516
#ifdef DEBUG
    s->frame_count = 0;
#endif
Roberto Togni's avatar
Roberto Togni committed
517 518
    if (avctx->codec_id == CODEC_ID_MP3ADU)
        s->adu_mode = 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
519 520 521
    return 0;
}

522
/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
523 524 525

/* cos(i*pi/64) */

526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
#define COS0_0  FIXHR(0.50060299823519630134/2)
#define COS0_1  FIXHR(0.50547095989754365998/2)
#define COS0_2  FIXHR(0.51544730992262454697/2)
#define COS0_3  FIXHR(0.53104259108978417447/2)
#define COS0_4  FIXHR(0.55310389603444452782/2)
#define COS0_5  FIXHR(0.58293496820613387367/2)
#define COS0_6  FIXHR(0.62250412303566481615/2)
#define COS0_7  FIXHR(0.67480834145500574602/2)
#define COS0_8  FIXHR(0.74453627100229844977/2)
#define COS0_9  FIXHR(0.83934964541552703873/2)
#define COS0_10 FIXHR(0.97256823786196069369/2)
#define COS0_11 FIXHR(1.16943993343288495515/4)
#define COS0_12 FIXHR(1.48416461631416627724/4)
#define COS0_13 FIXHR(2.05778100995341155085/8)
#define COS0_14 FIXHR(3.40760841846871878570/8)
#define COS0_15 FIXHR(10.19000812354805681150/32)

#define COS1_0 FIXHR(0.50241928618815570551/2)
#define COS1_1 FIXHR(0.52249861493968888062/2)
#define COS1_2 FIXHR(0.56694403481635770368/2)
#define COS1_3 FIXHR(0.64682178335999012954/2)
#define COS1_4 FIXHR(0.78815462345125022473/2)
#define COS1_5 FIXHR(1.06067768599034747134/4)
#define COS1_6 FIXHR(1.72244709823833392782/4)
#define COS1_7 FIXHR(5.10114861868916385802/16)

#define COS2_0 FIXHR(0.50979557910415916894/2)
#define COS2_1 FIXHR(0.60134488693504528054/2)
#define COS2_2 FIXHR(0.89997622313641570463/2)
#define COS2_3 FIXHR(2.56291544774150617881/8)

#define COS3_0 FIXHR(0.54119610014619698439/2)
#define COS3_1 FIXHR(1.30656296487637652785/4)

#define COS4_0 FIXHR(0.70710678118654752439/2)
561 562

/* butterfly operator */
563
#define BF(a, b, c, s)\
564 565 566 567
{\
    tmp0 = tab[a] + tab[b];\
    tmp1 = tab[a] - tab[b];\
    tab[a] = tmp0;\
568
    tab[b] = MULH(tmp1<<(s), c);\
569 570 571 572
}

#define BF1(a, b, c, d)\
{\
573 574
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
575 576 577 578 579
    tab[c] += tab[d];\
}

#define BF2(a, b, c, d)\
{\
580 581
    BF(a, b, COS4_0, 1);\
    BF(c, d,-COS4_0, 1);\
582 583 584 585 586 587 588 589 590
    tab[c] += tab[d];\
    tab[a] += tab[c];\
    tab[c] += tab[b];\
    tab[b] += tab[d];\
}

#define ADD(a, b) tab[a] += tab[b]

/* DCT32 without 1/sqrt(2) coef zero scaling. */
591
static void dct32(int32_t *out, int32_t *tab)
592 593 594 595
{
    int tmp0, tmp1;

    /* pass 1 */
596 597
    BF( 0, 31, COS0_0 , 1);
    BF(15, 16, COS0_15, 5);
598
    /* pass 2 */
599 600
    BF( 0, 15, COS1_0 , 1);
    BF(16, 31,-COS1_0 , 1);
601
    /* pass 1 */
602 603
    BF( 7, 24, COS0_7 , 1);
    BF( 8, 23, COS0_8 , 1);
604
    /* pass 2 */
605 606
    BF( 7,  8, COS1_7 , 4);
    BF(23, 24,-COS1_7 , 4);
607
    /* pass 3 */
608 609 610 611
    BF( 0,  7, COS2_0 , 1);
    BF( 8, 15,-COS2_0 , 1);
    BF(16, 23, COS2_0 , 1);
    BF(24, 31,-COS2_0 , 1);
612
    /* pass 1 */
613 614
    BF( 3, 28, COS0_3 , 1);
    BF(12, 19, COS0_12, 2);
615
    /* pass 2 */
616 617
    BF( 3, 12, COS1_3 , 1);
    BF(19, 28,-COS1_3 , 1);
618
    /* pass 1 */
619 620
    BF( 4, 27, COS0_4 , 1);
    BF(11, 20, COS0_11, 2);
621
    /* pass 2 */
622 623
    BF( 4, 11, COS1_4 , 1);
    BF(20, 27,-COS1_4 , 1);
624
    /* pass 3 */
625 626 627 628
    BF( 3,  4, COS2_3 , 3);
    BF(11, 12,-COS2_3 , 3);
    BF(19, 20, COS2_3 , 3);
    BF(27, 28,-COS2_3 , 3);
629
    /* pass 4 */
630 631 632 633 634 635 636 637
    BF( 0,  3, COS3_0 , 1);
    BF( 4,  7,-COS3_0 , 1);
    BF( 8, 11, COS3_0 , 1);
    BF(12, 15,-COS3_0 , 1);
    BF(16, 19, COS3_0 , 1);
    BF(20, 23,-COS3_0 , 1);
    BF(24, 27, COS3_0 , 1);
    BF(28, 31,-COS3_0 , 1);
638

639 640 641


    /* pass 1 */
642 643
    BF( 1, 30, COS0_1 , 1);
    BF(14, 17, COS0_14, 3);
644
    /* pass 2 */
645 646
    BF( 1, 14, COS1_1 , 1);
    BF(17, 30,-COS1_1 , 1);
647
    /* pass 1 */
648 649
    BF( 6, 25, COS0_6 , 1);
    BF( 9, 22, COS0_9 , 1);
650
    /* pass 2 */
651 652
    BF( 6,  9, COS1_6 , 2);
    BF(22, 25,-COS1_6 , 2);
653
    /* pass 3 */
654 655 656 657
    BF( 1,  6, COS2_1 , 1);
    BF( 9, 14,-COS2_1 , 1);
    BF(17, 22, COS2_1 , 1);
    BF(25, 30,-COS2_1 , 1);
658

659
    /* pass 1 */
660 661
    BF( 2, 29, COS0_2 , 1);
    BF(13, 18, COS0_13, 3);
662
    /* pass 2 */
663 664
    BF( 2, 13, COS1_2 , 1);
    BF(18, 29,-COS1_2 , 1);
665
    /* pass 1 */
666 667
    BF( 5, 26, COS0_5 , 1);
    BF(10, 21, COS0_10, 1);
668
    /* pass 2 */
669 670
    BF( 5, 10, COS1_5 , 2);
    BF(21, 26,-COS1_5 , 2);
671
    /* pass 3 */
672 673 674 675
    BF( 2,  5, COS2_2 , 1);
    BF(10, 13,-COS2_2 , 1);
    BF(18, 21, COS2_2 , 1);
    BF(26, 29,-COS2_2 , 1);
676
    /* pass 4 */
677 678 679 680 681 682 683 684
    BF( 1,  2, COS3_1 , 2);
    BF( 5,  6,-COS3_1 , 2);
    BF( 9, 10, COS3_1 , 2);
    BF(13, 14,-COS3_1 , 2);
    BF(17, 18, COS3_1 , 2);
    BF(21, 22,-COS3_1 , 2);
    BF(25, 26, COS3_1 , 2);
    BF(29, 30,-COS3_1 , 2);
685

686
    /* pass 5 */
687 688 689
    BF1( 0,  1,  2,  3);
    BF2( 4,  5,  6,  7);
    BF1( 8,  9, 10, 11);
690 691 692 693 694
    BF2(12, 13, 14, 15);
    BF1(16, 17, 18, 19);
    BF2(20, 21, 22, 23);
    BF1(24, 25, 26, 27);
    BF2(28, 29, 30, 31);
695

696
    /* pass 6 */
697

698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
    ADD( 8, 12);
    ADD(12, 10);
    ADD(10, 14);
    ADD(14,  9);
    ADD( 9, 13);
    ADD(13, 11);
    ADD(11, 15);

    out[ 0] = tab[0];
    out[16] = tab[1];
    out[ 8] = tab[2];
    out[24] = tab[3];
    out[ 4] = tab[4];
    out[20] = tab[5];
    out[12] = tab[6];
    out[28] = tab[7];
    out[ 2] = tab[8];
    out[18] = tab[9];
    out[10] = tab[10];
    out[26] = tab[11];
    out[ 6] = tab[12];
    out[22] = tab[13];
    out[14] = tab[14];
    out[30] = tab[15];
722

723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
    ADD(24, 28);
    ADD(28, 26);
    ADD(26, 30);
    ADD(30, 25);
    ADD(25, 29);
    ADD(29, 27);
    ADD(27, 31);

    out[ 1] = tab[16] + tab[24];
    out[17] = tab[17] + tab[25];
    out[ 9] = tab[18] + tab[26];
    out[25] = tab[19] + tab[27];
    out[ 5] = tab[20] + tab[28];
    out[21] = tab[21] + tab[29];
    out[13] = tab[22] + tab[30];
    out[29] = tab[23] + tab[31];
    out[ 3] = tab[24] + tab[20];
    out[19] = tab[25] + tab[21];
    out[11] = tab[26] + tab[22];
    out[27] = tab[27] + tab[23];
    out[ 7] = tab[28] + tab[18];
    out[23] = tab[29] + tab[19];
    out[15] = tab[30] + tab[17];
    out[31] = tab[31];
}

#if FRAC_BITS <= 15

751
static inline int round_sample(int *sum)
752 753
{
    int sum1;
754 755
    sum1 = (*sum) >> OUT_SHIFT;
    *sum &= (1<<OUT_SHIFT)-1;
756 757 758 759
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
760
    return sum1;
761 762
}

Luca Barbato's avatar
Luca Barbato committed
763 764
/* signed 16x16 -> 32 multiply add accumulate */
#define MACS(rt, ra, rb) MAC16(rt, ra, rb)
Siarhei Siamashka's avatar
Siarhei Siamashka committed
765

Luca Barbato's avatar
Luca Barbato committed
766 767
/* signed 16x16 -> 32 multiply */
#define MULS(ra, rb) MUL16(ra, rb)
768

769 770
#else

771
static inline int round_sample(int64_t *sum)
772 773
{
    int sum1;
774 775
    sum1 = (int)((*sum) >> OUT_SHIFT);
    *sum &= (1<<OUT_SHIFT)-1;
776 777 778 779
    if (sum1 < OUT_MIN)
        sum1 = OUT_MIN;
    else if (sum1 > OUT_MAX)
        sum1 = OUT_MAX;
780
    return sum1;
781 782
}

783
#   define MULS(ra, rb) MUL64(ra, rb)
784 785 786
#endif

#define SUM8(sum, op, w, p) \
787
{                                               \
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
    sum op MULS((w)[0 * 64], p[0 * 64]);\
    sum op MULS((w)[1 * 64], p[1 * 64]);\
    sum op MULS((w)[2 * 64], p[2 * 64]);\
    sum op MULS((w)[3 * 64], p[3 * 64]);\
    sum op MULS((w)[4 * 64], p[4 * 64]);\
    sum op MULS((w)[5 * 64], p[5 * 64]);\
    sum op MULS((w)[6 * 64], p[6 * 64]);\
    sum op MULS((w)[7 * 64], p[7 * 64]);\
}

#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
{                                               \
    int tmp;\
    tmp = p[0 * 64];\
    sum1 op1 MULS((w1)[0 * 64], tmp);\
    sum2 op2 MULS((w2)[0 * 64], tmp);\
    tmp = p[1 * 64];\
    sum1 op1 MULS((w1)[1 * 64], tmp);\
    sum2 op2 MULS((w2)[1 * 64], tmp);\
    tmp = p[2 * 64];\
    sum1 op1 MULS((w1)[2 * 64], tmp);\
    sum2 op2 MULS((w2)[2 * 64], tmp);\
    tmp = p[3 * 64];\
    sum1 op1 MULS((w1)[3 * 64], tmp);\
    sum2 op2 MULS((w2)[3 * 64], tmp);\
    tmp = p[4 * 64];\
    sum1 op1 MULS((w1)[4 * 64], tmp);\
    sum2 op2 MULS((w2)[4 * 64], tmp);\
    tmp = p[5 * 64];\
    sum1 op1 MULS((w1)[5 * 64], tmp);\
    sum2 op2 MULS((w2)[5 * 64], tmp);\
    tmp = p[6 * 64];\
    sum1 op1 MULS((w1)[6 * 64], tmp);\
    sum2 op2 MULS((w2)[6 * 64], tmp);\
    tmp = p[7 * 64];\
    sum1 op1 MULS((w1)[7 * 64], tmp);\
    sum2 op2 MULS((w2)[7 * 64], tmp);\
825 826
}

827 828 829 830 831 832 833
void ff_mpa_synth_init(MPA_INT *window)
{
    int i;

    /* max = 18760, max sum over all 16 coefs : 44736 */
    for(i=0;i<257;i++) {
        int v;
834
        v = ff_mpa_enwindow[i];
835 836 837 838 839 840 841 842
#if WFRAC_BITS < 16
        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
#endif
        window[i] = v;
        if ((i & 63) != 0)
            v = -v;
        if (i != 0)
            window[512 - i] = v;
843
    }
844
}
845 846 847 848

/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
   32 samples. */
/* XXX: optimize by avoiding ring buffer usage */
849
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
850
                         MPA_INT *window, int *dither_state,
851
                         OUT_INT *samples, int incr,
852
                         int32_t sb_samples[SBLIMIT])
853
{
854
    int32_t tmp[32];
855
    register MPA_INT *synth_buf;
Alex Beregszaszi's avatar
Alex Beregszaszi committed
856
    register const MPA_INT *w, *w2, *p;
857
    int j, offset, v;
858
    OUT_INT *samples2;
859
#if FRAC_BITS <= 15
860
    int sum, sum2;
861
#else
862
    int64_t sum, sum2;
863
#endif
864

865
    dct32(tmp, sb_samples);
866

867 868
    offset = *synth_buf_offset;
    synth_buf = synth_buf_ptr + offset;
869 870 871 872

    for(j=0;j<32;j++) {
        v = tmp[j];
#if FRAC_BITS <= 15
873 874
        /* NOTE: can cause a loss in precision if very high amplitude
           sound */
875
        v = av_clip_int16(v);
876 877 878 879 880 881
#endif
        synth_buf[j] = v;
    }
    /* copy to avoid wrap */
    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));

882
    samples2 = samples + 31 * incr;
883
    w = window;
884 885
    w2 = window + 31;

886
    sum = *dither_state;
887 888 889 890
    p = synth_buf + 16;
    SUM8(sum, +=, w, p);
    p = synth_buf + 48;
    SUM8(sum, -=, w + 32, p);
891
    *samples = round_sample(&sum);
892
    samples += incr;
893 894
    w++;

895 896 897 898 899 900 901 902 903
    /* we calculate two samples at the same time to avoid one memory
       access per two sample */
    for(j=1;j<16;j++) {
        sum2 = 0;
        p = synth_buf + 16 + j;
        SUM8P2(sum, +=, sum2, -=, w, w2, p);
        p = synth_buf + 48 - j;
        SUM8P2(sum, -=, sum2, -=, w + 32, w2 + 32, p);

904
        *samples = round_sample(&sum);
905
        samples += incr;
906 907
        sum += sum2;
        *samples2 = round_sample(&sum);
908
        samples2 -= incr;
909
        w++;
910
        w2--;
911
    }
912

913 914
    p = synth_buf + 32;
    SUM8(sum, -=, w + 32, p);
915
    *samples = round_sample(&sum);
916
    *dither_state= sum;
917

918
    offset = (offset - 32) & 511;
919
    *synth_buf_offset = offset;
920 921
}

Michael Niedermayer's avatar
Michael Niedermayer committed
922 923 924 925 926 927