h264.c 312 KB
Newer Older
Michael Niedermayer's avatar
Michael Niedermayer committed
1 2 3 4
/*
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Michael Niedermayer's avatar
Michael Niedermayer committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Michael Niedermayer's avatar
Michael Niedermayer committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Michael Niedermayer's avatar
Michael Niedermayer committed
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Michael Niedermayer's avatar
Michael Niedermayer committed
20 21
 *
 */
22

Michael Niedermayer's avatar
Michael Niedermayer committed
23 24 25 26 27 28 29 30 31
/**
 * @file h264.c
 * H.264 / AVC / MPEG4 part10 codec.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
32
#include "h264.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
33
#include "h264data.h"
34
#include "h264_parser.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
35 36
#include "golomb.h"

37 38
#include "cabac.h"

39
//#undef NDEBUG
Michael Niedermayer's avatar
Michael Niedermayer committed
40 41 42 43 44 45 46 47 48 49 50
#include <assert.h>

static VLC coeff_token_vlc[4];
static VLC chroma_dc_coeff_token_vlc;

static VLC total_zeros_vlc[15];
static VLC chroma_dc_total_zeros_vlc[3];

static VLC run_vlc[6];
static VLC run7_vlc;

51 52
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
53
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
54
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
55

56
static av_always_inline uint32_t pack16to32(int a, int b){
Michael Niedermayer's avatar
Michael Niedermayer committed
57 58 59 60 61 62 63
#ifdef WORDS_BIGENDIAN
   return (b&0xFFFF) + (a<<16);
#else
   return (a&0xFFFF) + (b<<16);
#endif
}

64 65 66 67 68 69 70 71 72
const uint8_t ff_rem6[52]={
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
};

const uint8_t ff_div6[52]={
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
};


Michael Niedermayer's avatar
Michael Niedermayer committed
73 74
/**
 * fill a rectangle.
Loic Le Loarer's avatar
Loic Le Loarer committed
75 76
 * @param h height of the rectangle, should be a constant
 * @param w width of the rectangle, should be a constant
Michael Niedermayer's avatar
Michael Niedermayer committed
77 78
 * @param size the size of val (1 or 4), should be a constant
 */
79
static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
80
    uint8_t *p= (uint8_t*)vp;
Michael Niedermayer's avatar
Michael Niedermayer committed
81
    assert(size==1 || size==4);
Loren Merritt's avatar
Loren Merritt committed
82
    assert(w<=4);
83

Michael Niedermayer's avatar
Michael Niedermayer committed
84 85
    w      *= size;
    stride *= size;
86

87
    assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
88
    assert((stride&(w-1))==0);
Loren Merritt's avatar
Loren Merritt committed
89 90 91 92 93 94
    if(w==2){
        const uint16_t v= size==4 ? val : val*0x0101;
        *(uint16_t*)(p + 0*stride)= v;
        if(h==1) return;
        *(uint16_t*)(p + 1*stride)= v;
        if(h==2) return;
Michael Niedermayer's avatar
Michael Niedermayer committed
95
        *(uint16_t*)(p + 2*stride)=
Loren Merritt's avatar
Loren Merritt committed
96 97 98 99 100 101 102
        *(uint16_t*)(p + 3*stride)= v;
    }else if(w==4){
        const uint32_t v= size==4 ? val : val*0x01010101;
        *(uint32_t*)(p + 0*stride)= v;
        if(h==1) return;
        *(uint32_t*)(p + 1*stride)= v;
        if(h==2) return;
Michael Niedermayer's avatar
Michael Niedermayer committed
103
        *(uint32_t*)(p + 2*stride)=
Loren Merritt's avatar
Loren Merritt committed
104 105 106 107 108 109 110 111 112
        *(uint32_t*)(p + 3*stride)= v;
    }else if(w==8){
    //gcc can't optimize 64bit math on x86_32
#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
        const uint64_t v= val*0x0100000001ULL;
        *(uint64_t*)(p + 0*stride)= v;
        if(h==1) return;
        *(uint64_t*)(p + 1*stride)= v;
        if(h==2) return;
Michael Niedermayer's avatar
Michael Niedermayer committed
113
        *(uint64_t*)(p + 2*stride)=
Loren Merritt's avatar
Loren Merritt committed
114 115 116
        *(uint64_t*)(p + 3*stride)= v;
    }else if(w==16){
        const uint64_t v= val*0x0100000001ULL;
Michael Niedermayer's avatar
Michael Niedermayer committed
117 118 119
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
Loren Merritt's avatar
Loren Merritt committed
120 121
        *(uint64_t*)(p + 8+1*stride)= v;
        if(h==2) return;
Michael Niedermayer's avatar
Michael Niedermayer committed
122 123 124
        *(uint64_t*)(p + 0+2*stride)=
        *(uint64_t*)(p + 8+2*stride)=
        *(uint64_t*)(p + 0+3*stride)=
Loren Merritt's avatar
Loren Merritt committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
        *(uint64_t*)(p + 8+3*stride)= v;
#else
        *(uint32_t*)(p + 0+0*stride)=
        *(uint32_t*)(p + 4+0*stride)= val;
        if(h==1) return;
        *(uint32_t*)(p + 0+1*stride)=
        *(uint32_t*)(p + 4+1*stride)= val;
        if(h==2) return;
        *(uint32_t*)(p + 0+2*stride)=
        *(uint32_t*)(p + 4+2*stride)=
        *(uint32_t*)(p + 0+3*stride)=
        *(uint32_t*)(p + 4+3*stride)= val;
    }else if(w==16){
        *(uint32_t*)(p + 0+0*stride)=
        *(uint32_t*)(p + 4+0*stride)=
        *(uint32_t*)(p + 8+0*stride)=
        *(uint32_t*)(p +12+0*stride)=
        *(uint32_t*)(p + 0+1*stride)=
        *(uint32_t*)(p + 4+1*stride)=
        *(uint32_t*)(p + 8+1*stride)=
        *(uint32_t*)(p +12+1*stride)= val;
        if(h==2) return;
        *(uint32_t*)(p + 0+2*stride)=
        *(uint32_t*)(p + 4+2*stride)=
        *(uint32_t*)(p + 8+2*stride)=
        *(uint32_t*)(p +12+2*stride)=
        *(uint32_t*)(p + 0+3*stride)=
        *(uint32_t*)(p + 4+3*stride)=
        *(uint32_t*)(p + 8+3*stride)=
        *(uint32_t*)(p +12+3*stride)= val;
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
156 157
    }else
        assert(0);
Loren Merritt's avatar
Loren Merritt committed
158
    assert(h==4);
Michael Niedermayer's avatar
Michael Niedermayer committed
159 160
}

Loren Merritt's avatar
Loren Merritt committed
161
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
Michael Niedermayer's avatar
Michael Niedermayer committed
162
    MpegEncContext * const s = &h->s;
163
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
164 165
    int topleft_xy, top_xy, topright_xy, left_xy[2];
    int topleft_type, top_type, topright_type, left_type[2];
166
    int left_block[8];
Michael Niedermayer's avatar
Michael Niedermayer committed
167 168
    int i;

169 170
    //FIXME deblocking could skip the intra and nnz parts.
    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
171 172
        return;

173 174
    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it

175 176 177 178 179 180 181 182 183 184 185 186
    top_xy     = mb_xy  - s->mb_stride;
    topleft_xy = top_xy - 1;
    topright_xy= top_xy + 1;
    left_xy[1] = left_xy[0] = mb_xy-1;
    left_block[0]= 0;
    left_block[1]= 1;
    left_block[2]= 2;
    left_block[3]= 3;
    left_block[4]= 7;
    left_block[5]= 10;
    left_block[6]= 8;
    left_block[7]= 11;
187
    if(FRAME_MBAFF){
188 189 190 191 192 193 194 195 196 197
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
        const int top_pair_xy      = pair_xy     - s->mb_stride;
        const int topleft_pair_xy  = top_pair_xy - 1;
        const int topright_pair_xy = top_pair_xy + 1;
        const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
        const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
        const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
        const int bottom = (s->mb_y & 1);
Michel Bardiaux's avatar
Michel Bardiaux committed
198
        tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
                ) {
            top_xy -= s->mb_stride;
        }
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
                ) {
            topleft_xy -= s->mb_stride;
        }
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
                ) {
            topright_xy -= s->mb_stride;
        }
        if (left_mb_frame_flag != curr_mb_frame_flag) {
            left_xy[1] = left_xy[0] = pair_xy - 1;
            if (curr_mb_frame_flag) {
                if (bottom) {
                    left_block[0]= 2;
                    left_block[1]= 2;
                    left_block[2]= 3;
                    left_block[3]= 3;
                    left_block[4]= 8;
                    left_block[5]= 11;
                    left_block[6]= 8;
                    left_block[7]= 11;
                } else {
                    left_block[0]= 0;
                    left_block[1]= 0;
                    left_block[2]= 1;
                    left_block[3]= 1;
                    left_block[4]= 7;
                    left_block[5]= 10;
                    left_block[6]= 7;
                    left_block[7]= 10;
                }
            } else {
                left_xy[1] += s->mb_stride;
                //left_block[0]= 0;
                left_block[1]= 2;
                left_block[2]= 0;
                left_block[3]= 2;
                //left_block[4]= 7;
                left_block[5]= 10;
                left_block[6]= 7;
                left_block[7]= 10;
            }
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
251 252
    }

253 254 255
    h->top_mb_xy = top_xy;
    h->left_mb_xy[0] = left_xy[0];
    h->left_mb_xy[1] = left_xy[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
256
    if(for_deblock){
257 258
        topleft_type = 0;
        topright_type = 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
259 260 261
        top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
        left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
        left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
262 263 264 265 266 267

        if(FRAME_MBAFF && !IS_INTRA(mb_type)){
            int list;
            int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
            for(i=0; i<16; i++)
                h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
268
            for(list=0; list<h->list_count; list++){
269 270 271
                if(USES_LIST(mb_type,list)){
                    uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
                    uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
Måns Rullgård's avatar
Måns Rullgård committed
272
                    int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
                    for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
                        dst[0] = src[0];
                        dst[1] = src[1];
                        dst[2] = src[2];
                        dst[3] = src[3];
                    }
                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
                    ref += h->b8_stride;
                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
                }else{
                    fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
                    fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
                }
            }
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
290 291 292 293 294 295 296
    }else{
        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
297 298

    if(IS_INTRA(mb_type)){
299 300
        h->topleft_samples_available=
        h->top_samples_available=
Michael Niedermayer's avatar
Michael Niedermayer committed
301 302 303 304 305 306 307 308 309 310 311 312 313 314
        h->left_samples_available= 0xFFFF;
        h->topright_samples_available= 0xEEEA;

        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
            h->topleft_samples_available= 0xB3FF;
            h->top_samples_available= 0x33FF;
            h->topright_samples_available= 0x26EA;
        }
        for(i=0; i<2; i++){
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
                h->topleft_samples_available&= 0xDF5F;
                h->left_samples_available&= 0x5F5F;
            }
        }
315

Michael Niedermayer's avatar
Michael Niedermayer committed
316 317
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
            h->topleft_samples_available&= 0x7FFF;
318

Michael Niedermayer's avatar
Michael Niedermayer committed
319 320
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
            h->topright_samples_available&= 0xFBFF;
321

Michael Niedermayer's avatar
Michael Niedermayer committed
322 323 324 325 326 327 328 329
        if(IS_INTRA4x4(mb_type)){
            if(IS_INTRA4x4(top_type)){
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
            }else{
                int pred;
330
                if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
Michael Niedermayer's avatar
Michael Niedermayer committed
331
                    pred= -1;
332 333
                else{
                    pred= 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
334 335 336 337 338 339 340 341 342 343 344 345
                }
                h->intra4x4_pred_mode_cache[4+8*0]=
                h->intra4x4_pred_mode_cache[5+8*0]=
                h->intra4x4_pred_mode_cache[6+8*0]=
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
            }
            for(i=0; i<2; i++){
                if(IS_INTRA4x4(left_type[i])){
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                }else{
                    int pred;
346
                    if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
Michael Niedermayer's avatar
Michael Niedermayer committed
347
                        pred= -1;
348 349
                    else{
                        pred= 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
350 351 352 353 354 355 356
                    }
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
                }
            }
        }
    }
357 358


Michael Niedermayer's avatar
Michael Niedermayer committed
359
/*
360 361 362 363 364 365
0 . T T. T T T T
1 L . .L . . . .
2 L . .L . . . .
3 . T TL . . . .
4 L . .L . . . .
5 L . .. . . . .
Michael Niedermayer's avatar
Michael Niedermayer committed
366 367 368
*/
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
    if(top_type){
369 370 371
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
372
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
373

374
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
375
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
376

377
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
378
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
379

Michael Niedermayer's avatar
Michael Niedermayer committed
380
    }else{
381
        h->non_zero_count_cache[4+8*0]=
Michael Niedermayer's avatar
Michael Niedermayer committed
382 383 384
        h->non_zero_count_cache[5+8*0]=
        h->non_zero_count_cache[6+8*0]=
        h->non_zero_count_cache[7+8*0]=
385

Michael Niedermayer's avatar
Michael Niedermayer committed
386 387
        h->non_zero_count_cache[1+8*0]=
        h->non_zero_count_cache[2+8*0]=
388

Michael Niedermayer's avatar
Michael Niedermayer committed
389
        h->non_zero_count_cache[1+8*3]=
Michael Niedermayer's avatar
Michael Niedermayer committed
390
        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
391

Michael Niedermayer's avatar
Michael Niedermayer committed
392
    }
393

394 395 396 397 398 399 400
    for (i=0; i<2; i++) {
        if(left_type[i]){
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
        }else{
401 402 403
            h->non_zero_count_cache[3+8*1 + 2*8*i]=
            h->non_zero_count_cache[3+8*2 + 2*8*i]=
            h->non_zero_count_cache[0+8*1 +   8*i]=
404
            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
        }
    }

    if( h->pps.cabac ) {
        // top_cbp
        if(top_type) {
            h->top_cbp = h->cbp_table[top_xy];
        } else if(IS_INTRA(mb_type)) {
            h->top_cbp = 0x1C0;
        } else {
            h->top_cbp = 0;
        }
        // left_cbp
        if (left_type[0]) {
            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
        } else if(IS_INTRA(mb_type)) {
            h->left_cbp = 0x1C0;
        } else {
            h->left_cbp = 0;
        }
        if (left_type[0]) {
            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
        }
        if (left_type[1]) {
            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
430
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
431
    }
432

Michael Niedermayer's avatar
Michael Niedermayer committed
433
#if 1
434
    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
Michael Niedermayer's avatar
Michael Niedermayer committed
435
        int list;
436
        for(list=0; list<h->list_count; list++){
437
            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
Michael Niedermayer's avatar
Michael Niedermayer committed
438 439 440 441 442
                /*if(!h->mv_cache_clean[list]){
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
                    h->mv_cache_clean[list]= 1;
                }*/
443
                continue;
Michael Niedermayer's avatar
Michael Niedermayer committed
444 445
            }
            h->mv_cache_clean[list]= 0;
446

447
            if(USES_LIST(top_type, list)){
Michael Niedermayer's avatar
Michael Niedermayer committed
448 449 450 451 452 453 454 455 456 457 458
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
            }else{
459 460 461
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
Michael Niedermayer's avatar
Michael Niedermayer committed
462 463 464 465
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
            }

Loren Merritt's avatar
Loren Merritt committed
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
            for(i=0; i<2; i++){
                int cache_idx = scan8[0] - 1 + i*2*8;
                if(USES_LIST(left_type[i], list)){
                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
                    const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
                    *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
                    *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
                    h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
                    h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
                }else{
                    *(uint32_t*)h->mv_cache [list][cache_idx  ]=
                    *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
                    h->ref_cache[list][cache_idx  ]=
                    h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
481 482
            }

Loren Merritt's avatar
Loren Merritt committed
483
            if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
Michael Niedermayer's avatar
Michael Niedermayer committed
484 485
                continue;

486
            if(USES_LIST(topleft_type, list)){
487 488 489 490 491 492 493 494
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
            }else{
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
            }
495

496
            if(USES_LIST(topright_type, list)){
497 498 499 500 501 502 503 504 505
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
            }else{
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
            }

Loren Merritt's avatar
Loren Merritt committed
506
            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
507
                continue;
508 509 510

            h->ref_cache[list][scan8[5 ]+1] =
            h->ref_cache[list][scan8[7 ]+1] =
511
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
512
            h->ref_cache[list][scan8[4 ]] =
Michael Niedermayer's avatar
Michael Niedermayer committed
513 514 515
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
516
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
Michael Niedermayer's avatar
Michael Niedermayer committed
517 518
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
519 520 521

            if( h->pps.cabac ) {
                /* XXX beurk, Load mvd */
522
                if(USES_LIST(top_type, list)){
523 524 525 526 527 528
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
                }else{
529 530 531
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
532 533
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
                }
534
                if(USES_LIST(left_type[0], list)){
535 536 537 538 539 540 541
                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
                }else{
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
                }
542
                if(USES_LIST(left_type[1], list)){
543 544 545 546 547 548 549 550 551
                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
                }else{
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
                }
                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
552
                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
553 554
                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
555 556 557 558 559 560 561 562 563 564 565 566 567

                if(h->slice_type == B_TYPE){
                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);

                    if(IS_DIRECT(top_type)){
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
                    }else if(IS_8X8(top_type)){
                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
                    }else{
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
                    }
568

569 570 571 572 573 574 575 576
                    if(IS_DIRECT(left_type[0]))
                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
                    else if(IS_8X8(left_type[0]))
                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
                    else
                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;

                    if(IS_DIRECT(left_type[1]))
577
                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
578 579 580
                    else if(IS_8X8(left_type[1]))
                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
                    else
581
                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
                }
            }

            if(FRAME_MBAFF){
#define MAP_MVS\
                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
                if(MB_FIELD){
#define MAP_F2F(idx, mb_type)\
                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                        h->ref_cache[list][idx] <<= 1;\
                        h->mv_cache[list][idx][1] /= 2;\
                        h->mvd_cache[list][idx][1] /= 2;\
                    }
                    MAP_MVS
#undef MAP_F2F
                }else{
#define MAP_F2F(idx, mb_type)\
                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                        h->ref_cache[list][idx] >>= 1;\
                        h->mv_cache[list][idx][1] <<= 1;\
                        h->mvd_cache[list][idx][1] <<= 1;\
612
                    }
613 614
                    MAP_MVS
#undef MAP_F2F
615
                }
616
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
617 618 619
        }
    }
#endif
620 621

    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
Michael Niedermayer's avatar
Michael Niedermayer committed
622 623 624 625
}

static inline void write_back_intra_pred_mode(H264Context *h){
    MpegEncContext * const s = &h->s;
626
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644

    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
}

/**
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 */
static inline int check_intra4x4_pred_mode(H264Context *h){
    MpegEncContext * const s = &h->s;
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
    int i;
645

Michael Niedermayer's avatar
Michael Niedermayer committed
646 647 648 649
    if(!(h->top_samples_available&0x8000)){
        for(i=0; i<4; i++){
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
            if(status<0){
650
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
651 652 653 654 655 656
                return -1;
            } else if(status){
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
            }
        }
    }
657

Michael Niedermayer's avatar
Michael Niedermayer committed
658 659 660 661
    if(!(h->left_samples_available&0x8000)){
        for(i=0; i<4; i++){
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
            if(status<0){
662
                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
                return -1;
            } else if(status){
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
            }
        }
    }

    return 0;
} //FIXME cleanup like next

/**
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 */
static inline int check_intra_pred_mode(H264Context *h, int mode){
    MpegEncContext * const s = &h->s;
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
680

Michael Niedermayer's avatar
Michael Niedermayer committed
681
    if(mode > 6U) {
Loic Le Loarer's avatar
Loic Le Loarer committed
682
        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
683
        return -1;
Loic Le Loarer's avatar
Loic Le Loarer committed
684
    }
685

Michael Niedermayer's avatar
Michael Niedermayer committed
686 687 688
    if(!(h->top_samples_available&0x8000)){
        mode= top[ mode ];
        if(mode<0){
689
            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
690 691 692
            return -1;
        }
    }
693

Michael Niedermayer's avatar
Michael Niedermayer committed
694 695 696
    if(!(h->left_samples_available&0x8000)){
        mode= left[ mode ];
        if(mode<0){
697
            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
698
            return -1;
699
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
700 701 702 703 704 705 706 707 708 709 710 711 712 713
    }

    return mode;
}

/**
 * gets the predicted intra4x4 prediction mode.
 */
static inline int pred_intra_mode(H264Context *h, int n){
    const int index8= scan8[n];
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
    const int min= FFMIN(left, top);

Michel Bardiaux's avatar
Michel Bardiaux committed
714
    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
Michael Niedermayer's avatar
Michael Niedermayer committed
715 716 717 718 719 720 721

    if(min<0) return DC_PRED;
    else      return min;
}

static inline void write_back_non_zero_count(H264Context *h){
    MpegEncContext * const s = &h->s;
722
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
723

724 725 726
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
727
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
728 729 730
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
731

732
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
733
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
734
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
735

736
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
737
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
738
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
739 740 741 742 743 744 745 746

    if(FRAME_MBAFF){
        // store all luma nnzs, for deblocking
        int v = 0, i;
        for(i=0; i<16; i++)
            v += (!!h->non_zero_count_cache[scan8[i]]) << i;
        *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
747 748 749 750 751 752 753 754 755 756 757
}

/**
 * gets the predicted number of non zero coefficients.
 * @param n block index
 */
static inline int pred_non_zero_count(H264Context *h, int n){
    const int index8= scan8[n];
    const int left= h->non_zero_count_cache[index8 - 1];
    const int top = h->non_zero_count_cache[index8 - 8];
    int i= left + top;
758

Michael Niedermayer's avatar
Michael Niedermayer committed
759 760
    if(i<64) i= (i+1)>>1;

Michel Bardiaux's avatar
Michel Bardiaux committed
761
    tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
Michael Niedermayer's avatar
Michael Niedermayer committed
762 763 764 765

    return i&31;
}

766 767
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
Michel Bardiaux's avatar
Michel Bardiaux committed
768
    MpegEncContext *s = &h->s;
769

770 771 772
    /* there is no consistent mapping of mvs to neighboring locations that will
     * make mbaff happy, so we can't move all this logic to fill_caches */
    if(FRAME_MBAFF){
Måns Rullgård's avatar
Måns Rullgård committed
773
        const uint32_t *mb_types = s->current_picture_ptr->mb_type;
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
        const int16_t *mv;
        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
        *C = h->mv_cache[list][scan8[0]-2];

        if(!MB_FIELD
           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
            if(IS_INTERLACED(mb_types[topright_xy])){
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
                const int x4 = X4, y4 = Y4;\
                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
                if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
                    return LIST_NOT_USED;\
                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;

                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
            }
        }
        if(topright_ref == PART_NOT_AVAILABLE
           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
            if(!MB_FIELD
               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
            }
            if(MB_FIELD
               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
               && i >= scan8[0]+8){
                // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
                SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
            }
        }
#undef SET_DIAG_MV
    }

812 813 814 815
    if(topright_ref != PART_NOT_AVAILABLE){
        *C= h->mv_cache[list][ i - 8 + part_width ];
        return topright_ref;
    }else{
Michel Bardiaux's avatar
Michel Bardiaux committed
816
        tprintf(s->avctx, "topright MV not available\n");
817

818 819 820 821 822
        *C= h->mv_cache[list][ i - 8 - 1 ];
        return h->ref_cache[list][ i - 8 - 1 ];
    }
}

Michael Niedermayer's avatar
Michael Niedermayer committed
823 824 825 826 827 828 829 830 831 832 833 834 835
/**
 * gets the predicted MV.
 * @param n the block index
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
 * @param mx the x component of the predicted motion vector
 * @param my the y component of the predicted motion vector
 */
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
    const int index8= scan8[n];
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
836 837 838
    const int16_t * C;
    int diagonal_ref, match_count;

Michael Niedermayer's avatar
Michael Niedermayer committed
839
    assert(part_width==1 || part_width==2 || part_width==4);
840

Michael Niedermayer's avatar
Michael Niedermayer committed
841
/* mv_cache
842
  B . . A T T T T
Michael Niedermayer's avatar
Michael Niedermayer committed
843 844 845 846 847
  U . . L . . , .
  U . . L . . . .
  U . . L . . , .
  . . . L . . . .
*/
848 849 850

    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
Michel Bardiaux's avatar
Michel Bardiaux committed
851
    tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
852 853 854 855 856 857
    if(match_count > 1){ //most common
        *mx= mid_pred(A[0], B[0], C[0]);
        *my= mid_pred(A[1], B[1], C[1]);
    }else if(match_count==1){
        if(left_ref==ref){
            *mx= A[0];
858
            *my= A[1];
859 860
        }else if(top_ref==ref){
            *mx= B[0];
861
            *my= B[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
862
        }else{
863
            *mx= C[0];
864
            *my= C[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
865 866
        }
    }else{
867
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
Michael Niedermayer's avatar
Michael Niedermayer committed
868
            *mx= A[0];
869
            *my= A[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
870
        }else{
871 872
            *mx= mid_pred(A[0], B[0], C[0]);
            *my= mid_pred(A[1], B[1], C[1]);
Michael Niedermayer's avatar
Michael Niedermayer committed
873 874
        }
    }
875

Michel Bardiaux's avatar
Michel Bardiaux committed
876
    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
877 878 879 880 881 882 883 884 885 886 887 888 889
}

/**
 * gets the directionally predicted 16x8 MV.
 * @param n the block index
 * @param mx the x component of the predicted motion vector
 * @param my the y component of the predicted motion vector
 */
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
    if(n==0){
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];

Michel Bardiaux's avatar
Michel Bardiaux committed
890
        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
891

Michael Niedermayer's avatar
Michael Niedermayer committed
892 893 894 895 896 897 898 899
        if(top_ref == ref){
            *mx= B[0];
            *my= B[1];
            return;
        }
    }else{
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
900

Michel Bardiaux's avatar
Michel Bardiaux committed
901
        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923

        if(left_ref == ref){
            *mx= A[0];
            *my= A[1];
            return;
        }
    }

    //RARE
    pred_motion(h, n, 4, list, ref, mx, my);
}

/**
 * gets the directionally predicted 8x16 MV.
 * @param n the block index
 * @param mx the x component of the predicted motion vector
 * @param my the y component of the predicted motion vector
 */
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
    if(n==0){
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
924

Michel Bardiaux's avatar
Michel Bardiaux committed
925
        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
926 927 928 929 930 931 932

        if(left_ref == ref){
            *mx= A[0];
            *my= A[1];
            return;
        }
    }else{
933 934 935 936
        const int16_t * C;
        int diagonal_ref;

        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
937

Michel Bardiaux's avatar
Michel Bardiaux committed
938
        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
939

940
        if(diagonal_ref == ref){
Michael Niedermayer's avatar
Michael Niedermayer committed
941 942 943 944 945 946 947 948 949 950 951 952 953 954
            *mx= C[0];
            *my= C[1];
            return;
        }
    }

    //RARE
    pred_motion(h, n, 2, list, ref, mx, my);
}

static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];

Michel Bardiaux's avatar
Michel Bardiaux committed
955
    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
956 957 958 959

    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
960

Michael Niedermayer's avatar
Michael Niedermayer committed
961 962 963
        *mx = *my = 0;
        return;
    }
964

Michael Niedermayer's avatar
Michael Niedermayer committed
965 966 967 968 969
    pred_motion(h, 0, 4, 0, 0, mx, my);

    return;
}

970 971 972 973 974 975
static inline void direct_dist_scale_factor(H264Context * const h){
    const int poc = h->s.current_picture_ptr->poc;
    const int poc1 = h->ref_list[1][0].poc;
    int i;
    for(i=0; i<h->ref_count[0]; i++){
        int poc0 = h->ref_list[0][i].poc;
976
        int td = av_clip(poc1 - poc0, -128, 127);
977 978 979
        if(td == 0 /* FIXME || pic0 is a long-term ref */){
            h->dist_scale_factor[i] = 256;
        }else{
980
            int tb = av_clip(poc - poc0, -128, 127);
Diego Biurrun's avatar
Diego Biurrun committed
981
            int tx = (16384 + (FFABS(td) >> 1)) / td;
982
            h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
983 984
        }
    }
985 986 987 988 989 990
    if(FRAME_MBAFF){
        for(i=0; i<h->ref_count[0]; i++){
            h->dist_scale_factor_field[2*i] =
            h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
        }
    }
991
}
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
static inline void direct_ref_list_init(H264Context * const h){
    MpegEncContext * const s = &h->s;
    Picture * const ref1 = &h->ref_list[1][0];
    Picture * const cur = s->current_picture_ptr;
    int list, i, j;
    if(cur->pict_type == I_TYPE)
        cur->ref_count[0] = 0;
    if(cur->pict_type != B_TYPE)
        cur->ref_count[1] = 0;
    for(list=0; list<2; list++){
        cur->ref_count[list] = h->ref_count[list];
        for(j=0; j<h->ref_count[list]; j++)
            cur->ref_poc[list][j] = h->ref_list[list][j].poc;
    }
    if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
        return;
    for(list=0; list<2; list++){
        for(i=0; i<ref1->ref_count[list]; i++){
            const int poc = ref1->ref_poc[list][i];
1011
            h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1012 1013 1014 1015 1016 1017 1018
            for(j=0; j<h->ref_count[list]; j++)
                if(h->ref_list[list][j].poc == poc){
                    h->map_col_to_list0[list][i] = j;
                    break;
                }
        }
    }
1019 1020 1021 1022 1023 1024 1025 1026 1027
    if(FRAME_MBAFF){
        for(list=0; list<2; list++){
            for(i=0; i<ref1->ref_count[list]; i++){
                j = h->map_col_to_list0[list][i];
                h->map_col_to_list0_field[list][2*i] = 2*j;
                h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
            }
        }
    }
1028
}
1029 1030 1031 1032 1033 1034 1035 1036

static inline void pred_direct_motion(H264Context * const h, int *mb_type){
    MpegEncContext * const s = &h->s;
    const int mb_xy =   s->mb_x +   s->mb_y*s->mb_stride;
    const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
    const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
    const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
    const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1037
    const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1038
    const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1039
    const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1040
    const int is_b8x8 = IS_8X8(*mb_type);
1041
    unsigned int sub_mb_type;
1042 1043
    int i8, i4;

1044
#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1045 1046 1047 1048
    if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
        /* FIXME save sub mb types from previous frames (or derive from MVs)
         * so we know exactly what block size to use */
        sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1049
        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
1050
    }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1051 1052 1053 1054
        sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
        *mb_type =    MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
    }else{
        sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1055
        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
1056 1057 1058
    }
    if(!is_b8x8)
        *mb_type |= MB_TYPE_DIRECT2;
1059 1060
    if(MB_FIELD)
        *mb_type |= MB_TYPE_INTERLACED;
1061

Michel Bardiaux's avatar
Michel Bardiaux committed
1062
    tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1063

1064 1065 1066 1067 1068
    if(h->direct_spatial_mv_pred){
        int ref[2];
        int mv[2][2];
        int list;

1069 1070
        /* FIXME interlacing + spatial direct uses wrong colocated block positions */

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
        /* ref = min(neighbors) */
        for(list=0; list<2; list++){
            int refa = h->ref_cache[list][scan8[0] - 1];
            int refb = h->ref_cache[list][scan8[0] - 8];
            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
            if(refc == -2)
                refc = h->ref_cache[list][scan8[0] - 8 - 1];
            ref[list] = refa;
            if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
                ref[list] = refb;
            if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
                ref[list] = refc;
            if(ref[list] < 0)
                ref[list] = -1;
        }

        if(ref[0] < 0 && ref[1] < 0){
            ref[0] = ref[1] = 0;
            mv[0][0] = mv[0][1] =
            mv[1][0] = mv[1][1] = 0;
        }else{
            for(list=0; list<2; list++){
                if(ref[list] >= 0)
                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
                else
                    mv[list][0] = mv[list][1] = 0;
            }
        }

        if(ref[1] < 0){
            *mb_type &= ~MB_TYPE_P0L1;
            sub_mb_type &= ~MB_TYPE_P0L1;
        }else if(ref[0] < 0){
            *mb_type &= ~MB_TYPE_P0L0;
            sub_mb_type &= ~MB_TYPE_P0L0;
        }

        if(IS_16X16(*mb_type)){
1109 1110
            int a=0, b=0;

1111 1112
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1113
            if(!IS_INTRA(mb_type_col)
Diego Biurrun's avatar
Diego Biurrun committed
1114 1115
               && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
                   || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1116
                       && (h->x264_build>33 || !h->x264_build)))){
1117
                if(ref[0] > 0)
1118
                    a= pack16to32(mv[0][0],mv[0][1]);
1119
                if(ref[1] > 0)
1120
                    b= pack16to32(mv[1][0],mv[1][1]);
1121
            }else{
1122 1123
                a= pack16to32(mv[0][0],mv[0][1]);
                b= pack16to32(mv[1][0],mv[1][1]);
1124
            }
1125 1126
            fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
            fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1127 1128 1129 1130
        }else{
            for(i8=0; i8<4; i8++){
                const int x8 = i8&1;
                const int y8 = i8>>1;
1131

1132 1133 1134
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
                    continue;
                h->sub_mb_type[i8] = sub_mb_type;
1135

1136 1137
                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1138 1139
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1140

1141
                /* col_zero_flag */
1142 1143
                if(!IS_INTRA(mb_type_col) && (   l1ref0[x8 + y8*h->b8_stride] == 0
                                              || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1144
                                                  && (h->x264_build>33 || !h->x264_build)))){
1145
                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1146 1147
                    if(IS_SUB_8X8(sub_mb_type)){
                        const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
Diego Biurrun's avatar
Diego Biurrun committed
1148
                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1149 1150 1151 1152 1153 1154
                            if(ref[0] == 0)
                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
                            if(ref[1] == 0)
                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
                        }
                    }else
1155
                    for(i4=0; i4<4; i4++){
1156
                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
Diego Biurrun's avatar
Diego Biurrun committed
1157
                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
                            if(ref[0] == 0)
                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
                            if(ref[1] == 0)
                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
                        }
                    }
                }
            }
        }
    }else{ /* direct temporal mv pred */
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208