vp8.c 75.7 KB
Newer Older
1
/*
David Conrad's avatar
David Conrad committed
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
Daniel Kang's avatar
Daniel Kang committed
7
 * Copyright (C) 2012 Daniel Kang
David Conrad's avatar
David Conrad committed
8
 *
9
 * This file is part of Libav.
David Conrad's avatar
David Conrad committed
10
 *
11
 * Libav is free software; you can redistribute it and/or
David Conrad's avatar
David Conrad committed
12 13 14 15
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
16
 * Libav is distributed in the hope that it will be useful,
David Conrad's avatar
David Conrad committed
17 18 19 20 21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with Libav; if not, write to the Free Software
David Conrad's avatar
David Conrad committed
23 24 25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
David Conrad's avatar
David Conrad committed
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
David Conrad's avatar
David Conrad committed
30 31
#include "vp8data.h"
#include "rectangle.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
32
#include "thread.h"
David Conrad's avatar
David Conrad committed
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
Daniel Kang's avatar
Daniel Kang committed
40 41 42
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
43 44 45 46
#if HAVE_THREADS
            pthread_cond_destroy(&s->thread_data[i].cond);
            pthread_mutex_destroy(&s->thread_data[i].lock);
#endif
Daniel Kang's avatar
Daniel Kang committed
47 48 49 50
            av_freep(&s->thread_data[i].filter_strength);
            av_freep(&s->thread_data[i].edge_emu_buffer);
        }
    av_freep(&s->thread_data);
51 52 53 54 55 56 57 58
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

59
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
60 61
{
    int ret;
62 63
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
64
        return ret;
65 66
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, &f->tf);
67 68 69 70 71
        return AVERROR(ENOMEM);
    }
    return 0;
}

72
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
73
{
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    av_buffer_unref(&f->seg_map);
    ff_thread_release_buffer(s->avctx, &f->tf);
}

static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
{
    int ret;

    vp8_release_frame(s, dst);

    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
        return ret;
    if (src->seg_map &&
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
        vp8_release_frame(s, dst);
        return AVERROR(ENOMEM);
90
    }
91 92

    return 0;
93 94
}

95 96

static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
David Conrad's avatar
David Conrad committed
97 98 99 100
{
    VP8Context *s = avctx->priv_data;
    int i;

101 102
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        vp8_release_frame(s, &s->frames[i]);
David Conrad's avatar
David Conrad committed
103 104
    memset(s->framep, 0, sizeof(s->framep));

105
    if (free_mem)
106
        free_buffers(s);
107 108 109 110
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
111
    vp8_decode_flush_impl(avctx, 0);
David Conrad's avatar
David Conrad committed
112 113 114 115
}

static int update_dimensions(VP8Context *s, int width, int height)
{
Daniel Kang's avatar
Daniel Kang committed
116 117 118
    AVCodecContext *avctx = s->avctx;
    int i;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
119 120 121 122
    if (width  != s->avctx->width ||
        height != s->avctx->height) {
        if (av_image_check_size(width, height, 0, s->avctx))
            return AVERROR_INVALIDDATA;
David Conrad's avatar
David Conrad committed
123

124
        vp8_decode_flush_impl(s->avctx, 1);
David Conrad's avatar
David Conrad committed
125

Ronald S. Bultje's avatar
Ronald S. Bultje committed
126 127
        avcodec_set_dimensions(s->avctx, width, height);
    }
David Conrad's avatar
David Conrad committed
128 129 130 131

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

Daniel Kang's avatar
Daniel Kang committed
132 133 134 135 136 137 138 139 140 141
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
David Conrad's avatar
David Conrad committed
142

Daniel Kang's avatar
Daniel Kang committed
143 144
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
145
#if HAVE_THREADS
Daniel Kang's avatar
Daniel Kang committed
146 147
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
148
#endif
Daniel Kang's avatar
Daniel Kang committed
149 150 151 152
    }

    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
David Conrad's avatar
David Conrad committed
153 154
        return AVERROR(ENOMEM);

155
    s->macroblocks        = s->macroblocks_base + 1;
David Conrad's avatar
David Conrad committed
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

186 187 188
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
David Conrad's avatar
David Conrad committed
189

190 191 192 193 194 195 196 197 198 199 200 201 202
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
David Conrad's avatar
David Conrad committed
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
218
        int size = AV_RL24(sizes + 3*i);
David Conrad's avatar
David Conrad committed
219 220 221
        if (buf_size - size < 0)
            return -1;

222
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
David Conrad's avatar
David Conrad committed
223 224 225
        buf      += size;
        buf_size -= size;
    }
226
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
David Conrad's avatar
David Conrad committed
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

251 252 253 254 255 256 257
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
258 259 260

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
David Conrad's avatar
David Conrad committed
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
307
    int header_size, hscale, vscale, i, j, k, l, m, ret;
David Conrad's avatar
David Conrad committed
308 309 310 311 312 313
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
314
    header_size  = AV_RL24(buf) >> 5;
David Conrad's avatar
David Conrad committed
315 316 317
    buf      += 3;
    buf_size -= 3;

David Conrad's avatar
David Conrad committed
318 319 320 321 322 323 324
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
David Conrad's avatar
David Conrad committed
325 326 327 328 329 330 331

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
332 333
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
David Conrad's avatar
David Conrad committed
334 335 336 337 338 339 340 341 342
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

343
        if (hscale || vscale)
344
            avpriv_request_sample(s->avctx, "Upscaling");
345

David Conrad's avatar
David Conrad committed
346
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
347 348 349 350
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
David Conrad's avatar
David Conrad committed
351 352 353 354
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
355
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
David Conrad's avatar
David Conrad committed
356 357
    }

358
    ff_vp56_init_range_decoder(c, buf, header_size);
David Conrad's avatar
David Conrad committed
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

Daniel Kang's avatar
Daniel Kang committed
386 387 388 389 390 391
    if (!s->macroblocks_base || /* first frame */
        width != s->avctx->width || height != s->avctx->height) {
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

David Conrad's avatar
David Conrad committed
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
411 412
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
413 414
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
415
                    }
David Conrad's avatar
David Conrad committed
416 417

    if ((s->mbskip_enabled = vp8_rac_get(c)))
418
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
David Conrad's avatar
David Conrad committed
419 420

    if (!s->keyframe) {
421 422 423
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
David Conrad's avatar
David Conrad committed
424 425 426 427 428 429 430 431 432 433 434

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
435
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
David Conrad's avatar
David Conrad committed
436 437 438 439 440 441
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
442
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
David Conrad's avatar
David Conrad committed
443
{
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
444 445
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
David Conrad's avatar
David Conrad committed
446 447 448 449 450 451 452
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
453
    int bit, x = 0;
David Conrad's avatar
David Conrad committed
454

455
    if (vp56_rac_get_prob_branchy(c, p[0])) {
David Conrad's avatar
David Conrad committed
456 457 458 459 460 461 462 463
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
464 465 466 467 468 469 470 471 472 473 474
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
David Conrad's avatar
David Conrad committed
475 476 477 478

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

479 480
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
David Conrad's avatar
David Conrad committed
481
{
482 483 484
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
David Conrad's avatar
David Conrad committed
485
        return vp8_submv_prob[2];
486
    return vp8_submv_prob[1-!!left];
David Conrad's avatar
David Conrad committed
487 488 489 490
}

/**
 * Split motion vector prediction, 16.4.
491
 * @returns the number of motion vectors parsed (2, 4 or 16)
David Conrad's avatar
David Conrad committed
492
 */
493
static av_always_inline
Daniel Kang's avatar
Daniel Kang committed
494
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
David Conrad's avatar
David Conrad committed
495
{
496 497
    int part_idx;
    int n, num;
Daniel Kang's avatar
Daniel Kang committed
498
    VP8Macroblock *top_mb;
499 500
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
Daniel Kang's avatar
Daniel Kang committed
501
                  *mbsplits_top,
502
                  *mbsplits_cur, *firstidx;
Daniel Kang's avatar
Daniel Kang committed
503
    VP56mv *top_mv;
504 505
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
David Conrad's avatar
David Conrad committed
506

Daniel Kang's avatar
Daniel Kang committed
507 508 509 510 511 512 513
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

David Conrad's avatar
David Conrad committed
529
    for (n = 0; n < num; n++) {
530
        int k = firstidx[n];
531
        uint32_t left, above;
532 533
        const uint8_t *submv_prob;

534 535 536 537 538 539 540 541
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 543

        submv_prob = get_submv_prob(left, above);
David Conrad's avatar
David Conrad committed
544

545 546 547 548 549 550 551 552 553 554 555 556
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
557
            AV_WN32A(&mb->bmv[n], left);
David Conrad's avatar
David Conrad committed
558 559
        }
    }
560 561

    return num;
David Conrad's avatar
David Conrad committed
562 563
}

564
static av_always_inline
Daniel Kang's avatar
Daniel Kang committed
565
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566
{
Daniel Kang's avatar
Daniel Kang committed
567
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
568
                                  mb - 1 /* left */,
Daniel Kang's avatar
Daniel Kang committed
569
                                  0 /* top-left */ };
570
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
571
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572 573
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
574
    int8_t *sign_bias = s->sign_bias;
575 576 577 578
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

Daniel Kang's avatar
Daniel Kang committed
579 580 581 582 583 584 585 586 587
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

588 589
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
590
    AV_ZERO32(&near_mv[2]);
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
622
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
623 624 625 626 627 628 629 630 631 632 633 634
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
635
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
636 637 638
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 640 641

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
Daniel Kang's avatar
Daniel Kang committed
642
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643 644 645 646 647 648
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
649
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
650 651 652
                mb->bmv[0] = mb->mv;
            }
        } else {
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
653
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
654 655 656 657 658 659 660 661 662
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

663
static av_always_inline
664
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
Daniel Kang's avatar
Daniel Kang committed
665
                           int mb_x, int keyframe, int layout)
David Conrad's avatar
David Conrad committed
666
{
667 668
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

Daniel Kang's avatar
Daniel Kang committed
669 670 671 672
    if (layout == 1) {
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
673
    if (keyframe) {
674
        int x, y;
Daniel Kang's avatar
Daniel Kang committed
675
        uint8_t* top;
676
        uint8_t* const left = s->intra4x4_pred_mode_left;
Daniel Kang's avatar
Daniel Kang committed
677 678 679 680
        if (layout == 1)
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
681 682
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
683 684 685 686 687
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
David Conrad's avatar
David Conrad committed
688 689
            }
        }
690
    } else {
691
        int i;
692 693
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
David Conrad's avatar
David Conrad committed
694 695 696
    }
}

697
static av_always_inline
Daniel Kang's avatar
Daniel Kang committed
698 699
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
David Conrad's avatar
David Conrad committed
700 701 702 703
{
    VP56RangeCoder *c = &s->c;

    if (s->segmentation.update_map)
704
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
705
    else if (s->segmentation.enabled)
Ronald S. Bultje's avatar
Ronald S. Bultje committed
706
        *segment = ref ? *ref : *segment;
707
    mb->segment = *segment;
David Conrad's avatar
David Conrad committed
708

709
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
David Conrad's avatar
David Conrad committed
710 711 712 713 714

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
Daniel Kang's avatar
Daniel Kang committed
715
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 717
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
Daniel Kang's avatar
Daniel Kang committed
718 719 720 721 722
            if (s->mb_layout == 1)
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
723
        }
David Conrad's avatar
David Conrad committed
724

725
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
David Conrad's avatar
David Conrad committed
726
        mb->ref_frame = VP56_FRAME_CURRENT;
727
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
David Conrad's avatar
David Conrad committed
728
        // inter MB, 16.2
729 730
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
David Conrad's avatar
David Conrad committed
731 732 733
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
734
        s->ref_count[mb->ref_frame-1]++;
David Conrad's avatar
David Conrad committed
735 736

        // motion vectors, 16.3
Daniel Kang's avatar
Daniel Kang committed
737
        decode_mvs(s, mb, mb_x, mb_y, layout);
David Conrad's avatar
David Conrad committed
738 739 740 741
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

742
        if (mb->mode == MODE_I4x4)
Daniel Kang's avatar
Daniel Kang committed
743
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
David Conrad's avatar
David Conrad committed
744

745
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
David Conrad's avatar
David Conrad committed
746
        mb->ref_frame = VP56_FRAME_CURRENT;
747
        mb->partitioning = VP8_SPLITMVMODE_NONE;
748
        AV_ZERO32(&mb->bmv[0]);
David Conrad's avatar
David Conrad committed
749 750 751
    }
}

752
#ifndef decode_block_coeffs_internal
David Conrad's avatar
David Conrad committed
753
/**
754
 * @param r arithmetic bitstream reader context
755 756
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
David Conrad's avatar
David Conrad committed
757
 * @param i initial coeff index, 0 unless a separate DC block is coded
758
 * @param qmul array holding the dc/ac dequant factor at position 0/1
David Conrad's avatar
David Conrad committed
759 760 761
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
Diego Biurrun's avatar
Diego Biurrun committed
762
static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
763
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
764
                                        int i, uint8_t *token_prob, int16_t qmul[2])
David Conrad's avatar
David Conrad committed
765
{
766
    VP56RangeCoder c = *r;
767
    goto skip_eob;
768
    do {
769
        int coeff;
770 771
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
David Conrad's avatar
David Conrad committed
772

773
skip_eob:
774
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775
            if (++i == 16)
776
                break; // invalid input; blocks should end with EOB
777
            token_prob = probs[i][0];
778
            goto skip_eob;
779 780
        }

781
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782
            coeff = 1;
783
            token_prob = probs[i+1][1];
784
        } else {
785 786
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787
                if (coeff)
788
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
789 790 791
                coeff += 2;
            } else {
                // DCT_CAT*
792 793 794
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
795 796
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
797 798
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 800
                    }
                } else {    // DCT_CAT3 and up
801 802
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
803 804
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
805
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
806 807
                }
            }
808
            token_prob = probs[i+1][2];
809
        }
810
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
811
    } while (++i < 16);
812

813
    *r = c;
814
    return i;
David Conrad's avatar
David Conrad committed
815
}
816
#endif
David Conrad's avatar
David Conrad committed
817

818 819 820 821 822 823 824 825 826 827 828
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
829
static av_always_inline
Diego Biurrun's avatar
Diego Biurrun committed
830
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
831
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
832 833 834 835 836 837 838 839
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

840
static av_always_inline
Daniel Kang's avatar
Daniel Kang committed
841
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
842
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
David Conrad's avatar
David Conrad committed
843 844 845
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
846
    int segment = mb->segment;
847
    int block_dc = 0;
David Conrad's avatar
David Conrad committed
848 849 850 851 852

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
Daniel Kang's avatar
Daniel Kang committed
853
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
David Conrad's avatar
David Conrad committed
854 855
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
856 857 858 859
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
Daniel Kang's avatar
Daniel Kang committed
860
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861
            else
Daniel Kang's avatar
Daniel Kang committed
862
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
863
        }
David Conrad's avatar
David Conrad committed
864 865 866 867 868 869 870
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
871
            nnz_pred = l_nnz[y] + t_nnz[x];
Daniel Kang's avatar
Daniel Kang committed
872
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
Jason Garrett-Glaser's avatar
Jason Garrett-Glaser committed
873
                                      nnz_pred, s->qmat[segment].luma_qmul);
874
            // nnz+block_dc may be one more than the actual last index, but we don't care
Daniel Kang's avatar
Daniel Kang committed
875
            td->non_zero_count_cache[y][x] = nnz + block_dc;
David Conrad's avatar
David Conrad committed
876 877 878 879 880 881 882 883 884 885 886
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
Daniel Kang's avatar
Daniel Kang committed
887
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
David Conrad's avatar
David Conrad committed
888
                                          nnz_pred, s->qmat[segment].chroma_qmul);
Daniel Kang's avatar
Daniel Kang committed
889
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
David Conrad's avatar
David Conrad committed
890 891 892 893 894 895 896 897 898 899 900
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

Måns Rullgård's avatar
Måns Rullgård committed
922 923 924 925
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
926 927 928 929

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
930
    if (mb_x < mb_width-1)
931
        XCHG(top_border+32, src_y+16, 1);
932

933 934 935 936 937 938 939 940 941 942
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

943
static av_always_inline
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
David Conrad's avatar
David Conrad committed
965 966
{
    if (mode == DC_PRED8x8) {
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988