h264.c 289 KB
Newer Older
Michael Niedermayer's avatar
Michael Niedermayer committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */
 
/**
 * @file h264.c
 * H.264 / AVC / MPEG4 part10 codec.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

#include "common.h"
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264data.h"
#include "golomb.h"

34 35
#include "cabac.h"

Michael Niedermayer's avatar
Michael Niedermayer committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#undef NDEBUG
#include <assert.h>

#define interlaced_dct interlaced_dct_is_a_bad_name
#define mb_intra mb_intra_isnt_initalized_see_mb_type

#define LUMA_DC_BLOCK_INDEX   25
#define CHROMA_DC_BLOCK_INDEX 26

#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
#define COEFF_TOKEN_VLC_BITS           8
#define TOTAL_ZEROS_VLC_BITS           9
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
#define RUN_VLC_BITS                   3
#define RUN7_VLC_BITS                  6

#define MAX_SPS_COUNT 32
#define MAX_PPS_COUNT 256

#define MAX_MMCO_COUNT 66

/**
 * Sequence parameter set
 */
typedef struct SPS{
    
    int profile_idc;
    int level_idc;
Loren Merritt's avatar
Loren Merritt committed
64
    int transform_bypass;              ///< qpprime_y_zero_transform_bypass_flag
Michael Niedermayer's avatar
Michael Niedermayer committed
65 66 67 68 69 70 71 72
    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
    int poc_type;                      ///< pic_order_cnt_type
    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
    int delta_pic_order_always_zero_flag;
    int offset_for_non_ref_pic;
    int offset_for_top_to_bottom_field;
    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
    int ref_frame_count;               ///< num_ref_frames
73
    int gaps_in_frame_num_allowed_flag;
Michael Niedermayer's avatar
Michael Niedermayer committed
74 75 76 77 78
    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
    int frame_mbs_only_flag;
    int mb_aff;                        ///<mb_adaptive_frame_field_flag
    int direct_8x8_inference_flag;
79 80 81 82 83
    int crop;                   ///< frame_cropping_flag
    int crop_left;              ///< frame_cropping_rect_left_offset
    int crop_right;             ///< frame_cropping_rect_right_offset
    int crop_top;               ///< frame_cropping_rect_top_offset
    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
Michael Niedermayer's avatar
Michael Niedermayer committed
84
    int vui_parameters_present_flag;
Michael Niedermayer's avatar
Michael Niedermayer committed
85
    AVRational sar;
86 87 88 89
    int timing_info_present_flag;
    uint32_t num_units_in_tick;
    uint32_t time_scale;
    int fixed_frame_rate_flag;
Michael Niedermayer's avatar
Michael Niedermayer committed
90
    short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 92
    int bitstream_restriction_flag;
    int num_reorder_frames;
Michael Niedermayer's avatar
Michael Niedermayer committed
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
}SPS;

/**
 * Picture parameter set
 */
typedef struct PPS{
    int sps_id;
    int cabac;                  ///< entropy_coding_mode_flag
    int pic_order_present;      ///< pic_order_present_flag
    int slice_group_count;      ///< num_slice_groups_minus1 + 1
    int mb_slice_group_map_type;
    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
    int weighted_pred;          ///< weighted_pred_flag
    int weighted_bipred_idc;
    int init_qp;                ///< pic_init_qp_minus26 + 26
    int init_qs;                ///< pic_init_qs_minus26 + 26
    int chroma_qp_index_offset;
    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
    int constrained_intra_pred; ///< constrained_intra_pred_flag
    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
113
    int transform_8x8_mode;     ///< transform_8x8_mode_flag
Michael Niedermayer's avatar
Michael Niedermayer committed
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
}PPS;

/**
 * Memory management control operation opcode.
 */
typedef enum MMCOOpcode{
    MMCO_END=0,
    MMCO_SHORT2UNUSED,
    MMCO_LONG2UNUSED,
    MMCO_SHORT2LONG,
    MMCO_SET_MAX_LONG,
    MMCO_RESET, 
    MMCO_LONG,
} MMCOOpcode;

/**
 * Memory management control operation.
 */
typedef struct MMCO{
    MMCOOpcode opcode;
    int short_frame_num;
    int long_index;
} MMCO;

/**
 * H264Context
 */
typedef struct H264Context{
    MpegEncContext s;
    int nal_ref_idc;	
    int nal_unit_type;
#define NAL_SLICE		1
#define NAL_DPA			2
#define NAL_DPB			3
#define NAL_DPC			4
#define NAL_IDR_SLICE		5
#define NAL_SEI			6
#define NAL_SPS			7
#define NAL_PPS			8
#define NAL_PICTURE_DELIMITER	9
#define NAL_FILTER_DATA		10
    uint8_t *rbsp_buffer;
    int rbsp_buffer_size;

158 159 160 161 162 163 164
    /**
      * Used to parse AVC variant of h264
      */
    int is_avc; ///< this flag is != 0 if codec is avc1
    int got_avcC; ///< flag used to parse avcC data only once
    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)

Michael Niedermayer's avatar
Michael Niedermayer committed
165 166
    int chroma_qp; //QPc

167
    int prev_mb_skipped; //FIXME remove (IMHO not used)
Michael Niedermayer's avatar
Michael Niedermayer committed
168 169 170 171

    //prediction stuff
    int chroma_pred_mode;
    int intra16x16_pred_mode;
172 173 174

    int top_mb_xy;
    int left_mb_xy[2];
Michael Niedermayer's avatar
Michael Niedermayer committed
175 176 177 178
    
    int8_t intra4x4_pred_mode_cache[5*8];
    int8_t (*intra4x4_pred_mode)[8];
    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
179
    void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
180 181 182 183 184 185
    void (*pred8x8  [4+3])(uint8_t *src, int stride);
    void (*pred16x16[4+3])(uint8_t *src, int stride);
    unsigned int topleft_samples_available;
    unsigned int top_samples_available;
    unsigned int topright_samples_available;
    unsigned int left_samples_available;
186 187
    uint8_t (*top_borders[2])[16+2*8];
    uint8_t left_border[2*(17+2*9)];
Michael Niedermayer's avatar
Michael Niedermayer committed
188 189 190 191 192

    /**
     * non zero coeff count cache.
     * is 64 if not available.
     */
193
    uint8_t non_zero_count_cache[6*8] __align8;
194
    uint8_t (*non_zero_count)[16];
Michael Niedermayer's avatar
Michael Niedermayer committed
195 196 197 198

    /**
     * Motion vector cache.
     */
199 200
    int16_t mv_cache[2][5*8][2] __align8;
    int8_t ref_cache[2][5*8] __align8;
Michael Niedermayer's avatar
Michael Niedermayer committed
201 202 203 204 205 206 207 208
#define LIST_NOT_USED -1 //FIXME rename?
#define PART_NOT_AVAILABLE -2
    
    /**
     * is 1 if the specific list MV&references are set to 0,0,-2.
     */
    int mv_cache_clean[2];

209 210 211 212 213
    /**
     * number of neighbors (top and/or left) that used 8x8 dct
     */
    int neighbor_transform_size;

214 215 216 217 218
    /**
     * block_offset[ 0..23] for frame macroblocks
     * block_offset[24..47] for field macroblocks
     */
    int block_offset[2*(16+8)];
Michael Niedermayer's avatar
Michael Niedermayer committed
219
    
220 221
    uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
    uint32_t *mb2b8_xy;
222
    int b_stride; //FIXME use s->b4_stride
Michael Niedermayer's avatar
Michael Niedermayer committed
223 224
    int b8_stride;

225 226 227
    int halfpel_flag;
    int thirdpel_flag;

228 229 230
    int unknown_svq3_flag;
    int next_slice_index;

Michael Niedermayer's avatar
Michael Niedermayer committed
231 232 233 234 235 236 237
    SPS sps_buffer[MAX_SPS_COUNT];
    SPS sps; ///< current sps
    
    PPS pps_buffer[MAX_PPS_COUNT];
    /**
     * current pps
     */
238
    PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
Michael Niedermayer's avatar
Michael Niedermayer committed
239

Loren Merritt's avatar
Loren Merritt committed
240
    uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS
241 242
    uint16_t (*dequant8_coeff)[64];

Michael Niedermayer's avatar
Michael Niedermayer committed
243 244 245 246 247 248 249
    int slice_num;
    uint8_t *slice_table_base;
    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
    int slice_type;
    int slice_type_fixed;
    
    //interlacing specific flags
250
    int mb_aff_frame;
Michael Niedermayer's avatar
Michael Niedermayer committed
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
    int mb_field_decoding_flag;
    
    int sub_mb_type[4];
    
    //POC stuff
    int poc_lsb;
    int poc_msb;
    int delta_poc_bottom;
    int delta_poc[2];
    int frame_num;
    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
    int frame_num_offset;         ///< for POC type 2
    int prev_frame_num_offset;    ///< for POC type 2
    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2

    /**
     * frame_num for frames or 2*frame_num for field pics.
     */
    int curr_pic_num;
    
    /**
     * max_frame_num or 2*max_frame_num for field pics.
     */
    int max_pic_num;

    //Weighted pred stuff
Loren Merritt's avatar
Loren Merritt committed
278 279
    int use_weight;
    int use_weight_chroma;
Michael Niedermayer's avatar
Michael Niedermayer committed
280 281 282 283 284 285
    int luma_log2_weight_denom;
    int chroma_log2_weight_denom;
    int luma_weight[2][16];
    int luma_offset[2][16];
    int chroma_weight[2][16][2];
    int chroma_offset[2][16][2];
Loren Merritt's avatar
Loren Merritt committed
286
    int implicit_weight[16][16];
Michael Niedermayer's avatar
Michael Niedermayer committed
287 288
   
    //deblock
289
    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0 
290 291
    int slice_alpha_c0_offset;
    int slice_beta_offset;
Michael Niedermayer's avatar
Michael Niedermayer committed
292 293 294 295
     
    int redundant_pic_count;
    
    int direct_spatial_mv_pred;
296
    int dist_scale_factor[16];
297
    int map_col_to_list0[2][16];
Michael Niedermayer's avatar
Michael Niedermayer committed
298 299 300 301 302

    /**
     * num_ref_idx_l0/1_active_minus1 + 1
     */
    int ref_count[2];// FIXME split for AFF
Loren Merritt's avatar
Loren Merritt committed
303 304
    Picture *short_ref[32];
    Picture *long_ref[32];
Michael Niedermayer's avatar
Michael Niedermayer committed
305 306 307
    Picture default_ref_list[2][32];
    Picture ref_list[2][32]; //FIXME size?
    Picture field_ref_list[2][32]; //FIXME size?
308
    Picture *delayed_pic[16]; //FIXME size?
309
    Picture *delayed_output_pic;
Michael Niedermayer's avatar
Michael Niedermayer committed
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
    
    /**
     * memory management control operations buffer.
     */
    MMCO mmco[MAX_MMCO_COUNT];
    int mmco_index;
    
    int long_ref_count;  ///< number of actual long term references
    int short_ref_count; ///< number of actual short term references
    
    //data partitioning
    GetBitContext intra_gb;
    GetBitContext inter_gb;
    GetBitContext *intra_gb_ptr;
    GetBitContext *inter_gb_ptr;
    
    DCTELEM mb[16*24] __align8;
327 328 329 330 331

    /**
     * Cabac
     */
    CABACContext cabac;
332
    uint8_t      cabac_state[460];
333 334 335 336
    int          cabac_init_idc;

    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
    uint16_t     *cbp_table;
Michael Niedermayer's avatar
Michael Niedermayer committed
337 338
    int top_cbp;
    int left_cbp;
339
    /* chroma_pred_mode for i4x4 or i16x16, else 0 */
340 341
    uint8_t     *chroma_pred_mode_table;
    int         last_qscale_diff;
342
    int16_t     (*mvd_table[2])[2];
343
    int16_t     mvd_cache[2][5*8][2] __align8;
344 345
    uint8_t     *direct_table;
    uint8_t     direct_cache[5*8];
346

347 348
    uint8_t zigzag_scan[16];
    uint8_t field_scan[16];
Loren Merritt's avatar
Loren Merritt committed
349 350
    const uint8_t *zigzag_scan_q0;
    const uint8_t *field_scan_q0;
Michael Niedermayer's avatar
Michael Niedermayer committed
351 352 353 354 355 356 357 358 359 360 361
}H264Context;

static VLC coeff_token_vlc[4];
static VLC chroma_dc_coeff_token_vlc;

static VLC total_zeros_vlc[15];
static VLC chroma_dc_total_zeros_vlc[3];

static VLC run_vlc[6];
static VLC run7_vlc;

362 363
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
364
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
365

Michael Niedermayer's avatar
Michael Niedermayer committed
366 367 368 369 370 371 372 373
static inline uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
   return (b&0xFFFF) + (a<<16);
#else
   return (a&0xFFFF) + (b<<16);
#endif
}

Michael Niedermayer's avatar
Michael Niedermayer committed
374 375
/**
 * fill a rectangle.
Loic Le Loarer's avatar
Loic Le Loarer committed
376 377
 * @param h height of the rectangle, should be a constant
 * @param w width of the rectangle, should be a constant
Michael Niedermayer's avatar
Michael Niedermayer committed
378 379
 * @param size the size of val (1 or 4), should be a constant
 */
380 381
static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
    uint8_t *p= (uint8_t*)vp;
Michael Niedermayer's avatar
Michael Niedermayer committed
382 383 384 385 386
    assert(size==1 || size==4);
    
    w      *= size;
    stride *= size;
    
387
    assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
388
    assert((stride&(w-1))==0);
389
//FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
Michael Niedermayer's avatar
Michael Niedermayer committed
390 391 392 393 394 395 396 397
    if(w==2 && h==2){
        *(uint16_t*)(p + 0)=
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
    }else if(w==2 && h==4){
        *(uint16_t*)(p + 0*stride)=
        *(uint16_t*)(p + 1*stride)=
        *(uint16_t*)(p + 2*stride)=
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
Michael Niedermayer's avatar
Michael Niedermayer committed
398 399
    }else if(w==4 && h==1){
        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
Michael Niedermayer's avatar
Michael Niedermayer committed
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
    }else if(w==4 && h==2){
        *(uint32_t*)(p + 0*stride)=
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
    }else if(w==4 && h==4){
        *(uint32_t*)(p + 0*stride)=
        *(uint32_t*)(p + 1*stride)=
        *(uint32_t*)(p + 2*stride)=
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
    }else if(w==8 && h==1){
        *(uint32_t*)(p + 0)=
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
    }else if(w==8 && h==2){
        *(uint32_t*)(p + 0 + 0*stride)=
        *(uint32_t*)(p + 4 + 0*stride)=
        *(uint32_t*)(p + 0 + 1*stride)=
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
    }else if(w==8 && h==4){
        *(uint64_t*)(p + 0*stride)=
        *(uint64_t*)(p + 1*stride)=
        *(uint64_t*)(p + 2*stride)=
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
    }else if(w==16 && h==2){
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
    }else if(w==16 && h==4){
        *(uint64_t*)(p + 0+0*stride)=
        *(uint64_t*)(p + 8+0*stride)=
        *(uint64_t*)(p + 0+1*stride)=
        *(uint64_t*)(p + 8+1*stride)=
        *(uint64_t*)(p + 0+2*stride)=
        *(uint64_t*)(p + 8+2*stride)=
        *(uint64_t*)(p + 0+3*stride)=
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
    }else
        assert(0);
}

Michael Niedermayer's avatar
Michael Niedermayer committed
439
static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
Michael Niedermayer's avatar
Michael Niedermayer committed
440
    MpegEncContext * const s = &h->s;
441
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
442 443
    int topleft_xy, top_xy, topright_xy, left_xy[2];
    int topleft_type, top_type, topright_type, left_type[2];
444
    int left_block[8];
Michael Niedermayer's avatar
Michael Niedermayer committed
445 446
    int i;

447 448 449 450 451 452
    //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
    // the actual condition is whether we're on the edge of a slice,
    // and even then the intra and nnz parts are unnecessary.
    if(for_deblock && h->slice_num == 1)
        return;

453
    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it 
Michael Niedermayer's avatar
Michael Niedermayer committed
454
    
455 456 457 458 459 460 461 462 463 464 465 466
    top_xy     = mb_xy  - s->mb_stride;
    topleft_xy = top_xy - 1;
    topright_xy= top_xy + 1;
    left_xy[1] = left_xy[0] = mb_xy-1;
    left_block[0]= 0;
    left_block[1]= 1;
    left_block[2]= 2;
    left_block[3]= 3;
    left_block[4]= 7;
    left_block[5]= 10;
    left_block[6]= 8;
    left_block[7]= 11;
467
    if(h->mb_aff_frame){
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
        const int top_pair_xy      = pair_xy     - s->mb_stride;
        const int topleft_pair_xy  = top_pair_xy - 1;
        const int topright_pair_xy = top_pair_xy + 1;
        const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
        const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
        const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
        const int bottom = (s->mb_y & 1);
        tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
                ) {
            top_xy -= s->mb_stride;
        }
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
                ) {
            topleft_xy -= s->mb_stride;
        }
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
                ) {
            topright_xy -= s->mb_stride;
        }
        if (left_mb_frame_flag != curr_mb_frame_flag) {
            left_xy[1] = left_xy[0] = pair_xy - 1;
            if (curr_mb_frame_flag) {
                if (bottom) {
                    left_block[0]= 2;
                    left_block[1]= 2;
                    left_block[2]= 3;
                    left_block[3]= 3;
                    left_block[4]= 8;
                    left_block[5]= 11;
                    left_block[6]= 8;
                    left_block[7]= 11;
                } else {
                    left_block[0]= 0;
                    left_block[1]= 0;
                    left_block[2]= 1;
                    left_block[3]= 1;
                    left_block[4]= 7;
                    left_block[5]= 10;
                    left_block[6]= 7;
                    left_block[7]= 10;
                }
            } else {
                left_xy[1] += s->mb_stride;
                //left_block[0]= 0;
                left_block[1]= 2;
                left_block[2]= 0;
                left_block[3]= 2;
                //left_block[4]= 7;
                left_block[5]= 10;
                left_block[6]= 7;
                left_block[7]= 10;
            }
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
531 532
    }

533 534 535
    h->top_mb_xy = top_xy;
    h->left_mb_xy[0] = left_xy[0];
    h->left_mb_xy[1] = left_xy[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
536 537 538 539 540 541 542 543 544 545 546 547 548
    if(for_deblock){
        topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
        top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
        topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
        left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
        left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
    }else{
        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581

    if(IS_INTRA(mb_type)){
        h->topleft_samples_available= 
        h->top_samples_available= 
        h->left_samples_available= 0xFFFF;
        h->topright_samples_available= 0xEEEA;

        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
            h->topleft_samples_available= 0xB3FF;
            h->top_samples_available= 0x33FF;
            h->topright_samples_available= 0x26EA;
        }
        for(i=0; i<2; i++){
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
                h->topleft_samples_available&= 0xDF5F;
                h->left_samples_available&= 0x5F5F;
            }
        }
        
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
            h->topleft_samples_available&= 0x7FFF;
        
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
            h->topright_samples_available&= 0xFBFF;
    
        if(IS_INTRA4x4(mb_type)){
            if(IS_INTRA4x4(top_type)){
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
            }else{
                int pred;
582
                if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
Michael Niedermayer's avatar
Michael Niedermayer committed
583
                    pred= -1;
584 585
                else{
                    pred= 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
586 587 588 589 590 591 592 593 594 595 596 597
                }
                h->intra4x4_pred_mode_cache[4+8*0]=
                h->intra4x4_pred_mode_cache[5+8*0]=
                h->intra4x4_pred_mode_cache[6+8*0]=
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
            }
            for(i=0; i<2; i++){
                if(IS_INTRA4x4(left_type[i])){
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                }else{
                    int pred;
598
                    if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
Michael Niedermayer's avatar
Michael Niedermayer committed
599
                        pred= -1;
600 601
                    else{
                        pred= 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
                    }
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
                }
            }
        }
    }
    
    
/*
0 . T T. T T T T 
1 L . .L . . . . 
2 L . .L . . . . 
3 . T TL . . . . 
4 L . .L . . . . 
5 L . .. . . . . 
*/
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
    if(top_type){
621 622 623
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
624
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
Michael Niedermayer's avatar
Michael Niedermayer committed
625
    
626
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
627
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
Michael Niedermayer's avatar
Michael Niedermayer committed
628
    
629
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
630
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
Michael Niedermayer's avatar
Michael Niedermayer committed
631
        
Michael Niedermayer's avatar
Michael Niedermayer committed
632 633 634 635 636 637 638 639 640 641
    }else{
        h->non_zero_count_cache[4+8*0]=      
        h->non_zero_count_cache[5+8*0]=
        h->non_zero_count_cache[6+8*0]=
        h->non_zero_count_cache[7+8*0]=
    
        h->non_zero_count_cache[1+8*0]=
        h->non_zero_count_cache[2+8*0]=
    
        h->non_zero_count_cache[1+8*3]=
Michael Niedermayer's avatar
Michael Niedermayer committed
642 643
        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
        
Michael Niedermayer's avatar
Michael Niedermayer committed
644
    }
645

646 647 648 649 650 651 652 653 654 655 656
    for (i=0; i<2; i++) {
        if(left_type[i]){
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
        }else{
            h->non_zero_count_cache[3+8*1 + 2*8*i]= 
            h->non_zero_count_cache[3+8*2 + 2*8*i]= 
            h->non_zero_count_cache[0+8*1 +   8*i]= 
            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
        }
    }

    if( h->pps.cabac ) {
        // top_cbp
        if(top_type) {
            h->top_cbp = h->cbp_table[top_xy];
        } else if(IS_INTRA(mb_type)) {
            h->top_cbp = 0x1C0;
        } else {
            h->top_cbp = 0;
        }
        // left_cbp
        if (left_type[0]) {
            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
        } else if(IS_INTRA(mb_type)) {
            h->left_cbp = 0x1C0;
        } else {
            h->left_cbp = 0;
        }
        if (left_type[0]) {
            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
        }
        if (left_type[1]) {
            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
682
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
683
    }
684

Michael Niedermayer's avatar
Michael Niedermayer committed
685
#if 1
686
    //FIXME direct mb can skip much of this
687
    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
Michael Niedermayer's avatar
Michael Niedermayer committed
688
        int list;
689 690
        for(list=0; list<1+(h->slice_type==B_TYPE); list++){
            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
Michael Niedermayer's avatar
Michael Niedermayer committed
691 692 693 694 695
                /*if(!h->mv_cache_clean[list]){
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
                    h->mv_cache_clean[list]= 1;
                }*/
696
                continue;
Michael Niedermayer's avatar
Michael Niedermayer committed
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
            }
            h->mv_cache_clean[list]= 0;
            
            if(IS_INTER(top_type)){
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
            }else{
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
            }

            //FIXME unify cleanup or sth
            if(IS_INTER(left_type[0])){
                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
            }else{
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
                h->ref_cache[list][scan8[0] - 1 + 0*8]=
                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
            }
            
            if(IS_INTER(left_type[1])){
                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
            }else{
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
                h->ref_cache[list][scan8[0] - 1 + 2*8]=
                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
Michael Niedermayer's avatar
Michael Niedermayer committed
746
                assert((!left_type[0]) == (!left_type[1]));
Michael Niedermayer's avatar
Michael Niedermayer committed
747 748
            }

749
            if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
Michael Niedermayer's avatar
Michael Niedermayer committed
750 751
                continue;

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
            if(IS_INTER(topleft_type)){
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
            }else{
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
            }
            
            if(IS_INTER(topright_type)){
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
            }else{
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
            }
            

Michael Niedermayer's avatar
Michael Niedermayer committed
773 774
            h->ref_cache[list][scan8[5 ]+1] = 
            h->ref_cache[list][scan8[7 ]+1] = 
775
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
Michael Niedermayer's avatar
Michael Niedermayer committed
776 777 778 779
            h->ref_cache[list][scan8[4 ]] = 
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
780
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
Michael Niedermayer's avatar
Michael Niedermayer committed
781 782
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822

            if( h->pps.cabac ) {
                /* XXX beurk, Load mvd */
                if(IS_INTER(topleft_type)){
                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
                }else{
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
                }

                if(IS_INTER(top_type)){
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
                }else{
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= 
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= 
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= 
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
                }
                if(IS_INTER(left_type[0])){
                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
                }else{
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
                }
                if(IS_INTER(left_type[1])){
                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
                }else{
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
                }
                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
823
                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
824 825
                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852

                if(h->slice_type == B_TYPE){
                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);

                    if(IS_DIRECT(top_type)){
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
                    }else if(IS_8X8(top_type)){
                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
                    }else{
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
                    }
                    
                    //FIXME interlacing
                    if(IS_DIRECT(left_type[0])){
                        h->direct_cache[scan8[0] - 1 + 0*8]=
                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
                    }else if(IS_8X8(left_type[0])){
                        int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
                    }else{
                        h->direct_cache[scan8[0] - 1 + 0*8]=
                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
                    }
                }
853
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
854 855 856
        }
    }
#endif
857 858

    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
Michael Niedermayer's avatar
Michael Niedermayer committed
859 860 861 862
}

static inline void write_back_intra_pred_mode(H264Context *h){
    MpegEncContext * const s = &h->s;
863
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886

    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
}

/**
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 */
static inline int check_intra4x4_pred_mode(H264Context *h){
    MpegEncContext * const s = &h->s;
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
    int i;
    
    if(!(h->top_samples_available&0x8000)){
        for(i=0; i<4; i++){
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
            if(status<0){
887
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
888 889 890 891 892 893 894 895 896 897 898
                return -1;
            } else if(status){
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
            }
        }
    }
    
    if(!(h->left_samples_available&0x8000)){
        for(i=0; i<4; i++){
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
            if(status<0){
899
                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917
                return -1;
            } else if(status){
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
            }
        }
    }

    return 0;
} //FIXME cleanup like next

/**
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 */
static inline int check_intra_pred_mode(H264Context *h, int mode){
    MpegEncContext * const s = &h->s;
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
    
Loic Le Loarer's avatar
Loic Le Loarer committed
918 919
    if(mode < 0 || mode > 6) {
        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
920
        return -1;
Loic Le Loarer's avatar
Loic Le Loarer committed
921
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
922
    
Michael Niedermayer's avatar
Michael Niedermayer committed
923 924 925
    if(!(h->top_samples_available&0x8000)){
        mode= top[ mode ];
        if(mode<0){
926
            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
927 928 929 930 931 932 933
            return -1;
        }
    }
    
    if(!(h->left_samples_available&0x8000)){
        mode= left[ mode ];
        if(mode<0){
934
            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
            return -1;
        } 
    }

    return mode;
}

/**
 * gets the predicted intra4x4 prediction mode.
 */
static inline int pred_intra_mode(H264Context *h, int n){
    const int index8= scan8[n];
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
    const int min= FFMIN(left, top);

951
    tprintf("mode:%d %d min:%d\n", left ,top, min);
Michael Niedermayer's avatar
Michael Niedermayer committed
952 953 954 955 956 957 958

    if(min<0) return DC_PRED;
    else      return min;
}

static inline void write_back_non_zero_count(H264Context *h){
    MpegEncContext * const s = &h->s;
959
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
960

961 962 963
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
964
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
965 966 967
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
968
    
969
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
970
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
971
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
972

973
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
974
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
975
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
Michael Niedermayer's avatar
Michael Niedermayer committed
976 977 978 979 980 981 982 983 984 985 986 987 988 989
}

/**
 * gets the predicted number of non zero coefficients.
 * @param n block index
 */
static inline int pred_non_zero_count(H264Context *h, int n){
    const int index8= scan8[n];
    const int left= h->non_zero_count_cache[index8 - 1];
    const int top = h->non_zero_count_cache[index8 - 8];
    int i= left + top;
    
    if(i<64) i= (i+1)>>1;

990
    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
Michael Niedermayer's avatar
Michael Niedermayer committed
991 992 993 994

    return i&31;
}

995 996 997 998 999 1000 1001
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];

    if(topright_ref != PART_NOT_AVAILABLE){
        *C= h->mv_cache[list][ i - 8 + part_width ];
        return topright_ref;
    }else{
1002 1003
        tprintf("topright MV not available\n");

1004 1005 1006 1007 1008
        *C= h->mv_cache[list][ i - 8 - 1 ];
        return h->ref_cache[list][ i - 8 - 1 ];
    }
}

Michael Niedermayer's avatar
Michael Niedermayer committed
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
/**
 * gets the predicted MV.
 * @param n the block index
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
 * @param mx the x component of the predicted motion vector
 * @param my the y component of the predicted motion vector
 */
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
    const int index8= scan8[n];
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1022 1023 1024
    const int16_t * C;
    int diagonal_ref, match_count;

Michael Niedermayer's avatar
Michael Niedermayer committed
1025
    assert(part_width==1 || part_width==2 || part_width==4);
1026

Michael Niedermayer's avatar
Michael Niedermayer committed
1027 1028 1029 1030 1031 1032 1033
/* mv_cache
  B . . A T T T T 
  U . . L . . , .
  U . . L . . . .
  U . . L . . , .
  . . . L . . . .
*/
1034 1035 1036

    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1037
    tprintf("pred_motion match_count=%d\n", match_count);
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
    if(match_count > 1){ //most common
        *mx= mid_pred(A[0], B[0], C[0]);
        *my= mid_pred(A[1], B[1], C[1]);
    }else if(match_count==1){
        if(left_ref==ref){
            *mx= A[0];
            *my= A[1];        
        }else if(top_ref==ref){
            *mx= B[0];
            *my= B[1];        
Michael Niedermayer's avatar
Michael Niedermayer committed
1048
        }else{
1049 1050
            *mx= C[0];
            *my= C[1];        
Michael Niedermayer's avatar
Michael Niedermayer committed
1051 1052
        }
    }else{
1053
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
Michael Niedermayer's avatar
Michael Niedermayer committed
1054
            *mx= A[0];
1055
            *my= A[1];        
Michael Niedermayer's avatar
Michael Niedermayer committed
1056
        }else{
1057 1058
            *mx= mid_pred(A[0], B[0], C[0]);
            *my= mid_pred(A[1], B[1], C[1]);
Michael Niedermayer's avatar
Michael Niedermayer committed
1059 1060
        }
    }
1061
        
1062
    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
}

/**
 * gets the directionally predicted 16x8 MV.
 * @param n the block index
 * @param mx the x component of the predicted motion vector
 * @param my the y component of the predicted motion vector
 */
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
    if(n==0){
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];

1076
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
        
        if(top_ref == ref){
            *mx= B[0];
            *my= B[1];
            return;
        }
    }else{
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
        
1087
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110

        if(left_ref == ref){
            *mx= A[0];
            *my= A[1];
            return;
        }
    }

    //RARE
    pred_motion(h, n, 4, list, ref, mx, my);
}

/**
 * gets the directionally predicted 8x16 MV.
 * @param n the block index
 * @param mx the x component of the predicted motion vector
 * @param my the y component of the predicted motion vector
 */
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
    if(n==0){
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
        
1111
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
1112 1113 1114 1115 1116 1117 1118

        if(left_ref == ref){
            *mx= A[0];
            *my= A[1];
            return;
        }
    }else{
1119 1120 1121 1122
        const int16_t * C;
        int diagonal_ref;

        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
Michael Niedermayer's avatar
Michael Niedermayer committed
1123
        
1124
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
Michael Niedermayer's avatar
Michael Niedermayer committed
1125