diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 1926f20bdf7a4c5829439039e688b442ab2ede61..1cba5d35ac5eb02ce6c36ca87dbbe4935a932515 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -148,6 +148,7 @@ typedef enum { #define VP8_YMODES (B_PRED + 1) #define VP8_UV_MODES (TM_PRED + 1) #define VP8_I8X8_MODES (TM_PRED + 1) +#define VP8_I32X32_MODES (TM_PRED + 1) #define VP8_MVREFS (1 + SPLITMV - NEARESTMV) @@ -293,6 +294,11 @@ typedef struct { INTERPOLATIONFILTERTYPE interp_filter; #endif +#if CONFIG_SUPERBLOCKS + // FIXME need a SB array of 4 MB_MODE_INFOs that + // only needs one encoded_as_sb. + unsigned char encoded_as_sb; +#endif } MB_MODE_INFO; typedef struct { diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c index 8d43ce8273a8a200b550898d15fc24a46467e47e..5627aa43a4a073062efb96511ae33c25ab9f7c48 100644 --- a/vp8/common/entropymode.c +++ b/vp8/common/entropymode.c @@ -227,6 +227,14 @@ const vp8_tree_index vp8_mv_ref_tree[8] = { -NEWMV, -SPLITMV }; +#if CONFIG_SUPERBLOCKS +const vp8_tree_index vp8_sb_mv_ref_tree[6] = { + -ZEROMV, 2, + -NEARESTMV, 4, + -NEARMV, -NEWMV +}; +#endif + const vp8_tree_index vp8_sub_mv_ref_tree[6] = { -LEFT4X4, 2, -ABOVE4X4, 4, @@ -236,12 +244,18 @@ const vp8_tree_index vp8_sub_mv_ref_tree[6] = { struct vp8_token_struct vp8_bmode_encodings [VP8_BINTRAMODES]; struct vp8_token_struct vp8_ymode_encodings [VP8_YMODES]; +#if CONFIG_SUPERBLOCKS +struct vp8_token_struct vp8_sb_kf_ymode_encodings [VP8_I32X32_MODES]; +#endif struct vp8_token_struct vp8_kf_ymode_encodings [VP8_YMODES]; struct vp8_token_struct vp8_uv_mode_encodings [VP8_UV_MODES]; -struct vp8_token_struct vp8_i8x8_mode_encodings [VP8_UV_MODES]; +struct vp8_token_struct vp8_i8x8_mode_encodings [VP8_I8X8_MODES]; struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS]; struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS]; +#if CONFIG_SUPERBLOCKS +struct vp8_token_struct vp8_sb_mv_ref_encoding_array [VP8_MVREFS]; +#endif struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS]; @@ -253,11 +267,18 @@ void vp8_init_mbmode_probs(VP8_COMMON *x) { vp8_ymode_tree, x->fc.ymode_prob, bct, y_mode_cts, 256, 1); { int i; - for (i = 0; i < 8; i++) + for (i = 0; i < 8; i++) { vp8_tree_probs_from_distribution( VP8_YMODES, vp8_kf_ymode_encodings, vp8_kf_ymode_tree, x->kf_ymode_prob[i], bct, kf_y_mode_cts[i], 256, 1); +#if CONFIG_SUPERBLOCKS + vp8_tree_probs_from_distribution( + VP8_I32X32_MODES, vp8_sb_kf_ymode_encodings, vp8_sb_ymode_tree, + x->sb_kf_ymode_prob[i], bct, kf_y_mode_cts[i], + 256, 1); +#endif + } } { int i; @@ -360,6 +381,9 @@ void vp8_entropy_mode_init() { vp8_tokens_from_tree(vp8_bmode_encodings, vp8_bmode_tree); vp8_tokens_from_tree(vp8_ymode_encodings, vp8_ymode_tree); vp8_tokens_from_tree(vp8_kf_ymode_encodings, vp8_kf_ymode_tree); +#if CONFIG_SUPERBLOCKS + vp8_tokens_from_tree(vp8_sb_kf_ymode_encodings, vp8_sb_ymode_tree); +#endif vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree); vp8_tokens_from_tree(vp8_i8x8_mode_encodings, vp8_i8x8_mode_tree); vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree); @@ -370,6 +394,10 @@ void vp8_entropy_mode_init() { vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array, vp8_mv_ref_tree, NEARESTMV); +#if CONFIG_SUPERBLOCKS + vp8_tokens_from_tree_offset(vp8_sb_mv_ref_encoding_array, + vp8_sb_mv_ref_tree, NEARESTMV); +#endif vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array, vp8_sub_mv_ref_tree, LEFT4X4); } diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h index f9cc263b9e43a8896733797aa0cbba54d9a01a16..430c949a6263186e562f04ec1fb21d5075148608 100644 --- a/vp8/common/entropymode.h +++ b/vp8/common/entropymode.h @@ -40,21 +40,25 @@ extern const vp8_tree_index vp8_bmode_tree[]; extern const vp8_tree_index vp8_ymode_tree[]; extern const vp8_tree_index vp8_kf_ymode_tree[]; extern const vp8_tree_index vp8_uv_mode_tree[]; +#define vp8_sb_ymode_tree vp8_uv_mode_tree extern const vp8_tree_index vp8_i8x8_mode_tree[]; extern const vp8_tree_index vp8_mbsplit_tree[]; extern const vp8_tree_index vp8_mv_ref_tree[]; +extern const vp8_tree_index vp8_sb_mv_ref_tree[]; extern const vp8_tree_index vp8_sub_mv_ref_tree[]; extern struct vp8_token_struct vp8_bmode_encodings [VP8_BINTRAMODES]; extern struct vp8_token_struct vp8_ymode_encodings [VP8_YMODES]; +extern struct vp8_token_struct vp8_sb_kf_ymode_encodings [VP8_I32X32_MODES]; extern struct vp8_token_struct vp8_kf_ymode_encodings [VP8_YMODES]; -extern struct vp8_token_struct vp8_i8x8_mode_encodings [VP8_UV_MODES]; +extern struct vp8_token_struct vp8_i8x8_mode_encodings [VP8_I8X8_MODES]; extern struct vp8_token_struct vp8_uv_mode_encodings [VP8_UV_MODES]; extern struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS]; /* Inter mode values do not start at zero */ extern struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS]; +extern struct vp8_token_struct vp8_sb_mv_ref_encoding_array [VP8_MVREFS]; extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS]; void vp8_entropy_mode_init(void); diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index b71ef750df084e943cbfaa051d548759b3d6b2d2..d28024cda565c7adb744463817a9735375598ed7 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -47,6 +47,12 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) { rtcd->recon.recon4 = vp8_recon4b_c; rtcd->recon.recon_mb = vp8_recon_mb_c; rtcd->recon.recon_mby = vp8_recon_mby_c; +#if CONFIG_SUPERBLOCKS + rtcd->recon.build_intra_predictors_sby_s = + vp8_build_intra_predictors_sby_s; + rtcd->recon.build_intra_predictors_sbuv_s = + vp8_build_intra_predictors_sbuv_s; +#endif rtcd->recon.build_intra_predictors_mby = vp8_build_intra_predictors_mby; #if CONFIG_COMP_INTRA_PRED diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index 05c00ef4e1ee79bede20d7853084f9e1524f0531..d9c4b54be9b720777542412a0c0fa6623d7a346b 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -325,7 +325,13 @@ void vp8_loop_filter_frame lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; - if (mb_col > 0) + if (mb_col > 0 +#if CONFIG_SUPERBLOCKS + && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && + mode_info_context[0].mbmi.mb_skip_coeff && + mode_info_context[-1].mbmi.mb_skip_coeff) +#endif + ) vp8_loop_filter_mbv_c (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); @@ -344,7 +350,13 @@ void vp8_loop_filter_frame } /* don't apply across umv border */ - if (mb_row > 0) + if (mb_row > 0 +#if CONFIG_SUPERBLOCKS + && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && + mode_info_context[0].mbmi.mb_skip_coeff && + mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff) +#endif + ) vp8_loop_filter_mbh_c (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); @@ -362,7 +374,13 @@ void vp8_loop_filter_frame } } else { // FIXME: Not 8x8 aware - if (mb_col > 0) + if (mb_col > 0 +#if CONFIG_SUPERBLOCKS + && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && + mode_info_context[0].mbmi.mb_skip_coeff && + mode_info_context[-1].mbmi.mb_skip_coeff) +#endif + ) LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v) (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); @@ -371,7 +389,13 @@ void vp8_loop_filter_frame (y_ptr, post->y_stride, lfi_n->blim[filter_level]); /* don't apply across umv border */ - if (mb_row > 0) + if (mb_row > 0 +#if CONFIG_SUPERBLOCKS + && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && + mode_info_context[0].mbmi.mb_skip_coeff && + mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff) +#endif + ) LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h) (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index a36347dca8e5f8b599197c2301e844d61817edf0..b7a543220a0f2b98c18f801c01c0160aa6f193da 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -226,12 +226,15 @@ typedef struct VP8Common { /* Y,U,V,Y2 */ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ + ENTROPY_CONTEXT_PLANES left_context[2]; /* (up to) 4 contexts "" */ /* keyframe block modes are predicted by their above, left neighbors */ vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES - 1]; vp8_prob kf_ymode_prob[8][VP8_YMODES - 1]; /* keyframe "" */ +#if CONFIG_SUPERBLOCKS + vp8_prob sb_kf_ymode_prob[8][VP8_I32X32_MODES - 1]; +#endif int kf_ymode_probs_index; int kf_ymode_probs_update; vp8_prob kf_uv_mode_prob[VP8_YMODES] [VP8_UV_MODES - 1]; @@ -239,6 +242,9 @@ typedef struct VP8Common { vp8_prob prob_intra_coded; vp8_prob prob_last_coded; vp8_prob prob_gf_coded; +#if CONFIG_SUPERBLOCKS + vp8_prob sb_coded; +#endif // Context probabilities when using predictive coding of segment id vp8_prob segment_pred_probs[PREDICTION_PROBS]; diff --git a/vp8/common/pred_common.c b/vp8/common/pred_common.c index ac5d8600980b1c9628d67d930be22ae5ae170881..cb80a0f7e92e438d823456989181c268abfb2e1e 100644 --- a/vp8/common/pred_common.c +++ b/vp8/common/pred_common.c @@ -1,3 +1,4 @@ + /* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * @@ -224,10 +225,24 @@ void set_pred_flag(MACROBLOCKD *const xd, switch (pred_id) { case PRED_SEG_ID: xd->mode_info_context->mbmi.seg_id_predicted = pred_flag; +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + xd->mode_info_context[1].mbmi.seg_id_predicted = pred_flag; + xd->mode_info_context[xd->mode_info_stride].mbmi.seg_id_predicted = pred_flag; + xd->mode_info_context[xd->mode_info_stride+1].mbmi.seg_id_predicted = pred_flag; + } +#endif break; case PRED_REF: xd->mode_info_context->mbmi.ref_predicted = pred_flag; +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + xd->mode_info_context[1].mbmi.ref_predicted = pred_flag; + xd->mode_info_context[xd->mode_info_stride].mbmi.ref_predicted = pred_flag; + xd->mode_info_context[xd->mode_info_stride+1].mbmi.ref_predicted = pred_flag; + } +#endif break; case PRED_MBSKIP: diff --git a/vp8/common/recon.c b/vp8/common/recon.c index 8fc320863b85e2d375a63e699558aa11c1a97c0c..cf2d2fb85109940af7a42419adf05272d2386e76 100644 --- a/vp8/common/recon.c +++ b/vp8/common/recon.c @@ -124,6 +124,52 @@ void vp8_recon2b_c } } +#if CONFIG_SUPERBLOCKS +void vp8_recon_mby_s_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *xd, uint8_t *dst) { + int x, y; + BLOCKD *b = &xd->block[0]; + int stride = b->dst_stride; + short *diff = b->diff; + + for (y = 0; y < 16; y++) { + for (x = 0; x < 16; x++) { + int a = dst[x] + diff[x]; + if (a < 0) + a = 0; + else if (a > 255) + a = 255; + dst[x] = a; + } + dst += stride; + diff += 16; + } +} + +void vp8_recon_mbuv_s_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { + int x, y, i; + uint8_t *dst = udst; + + for (i = 0; i < 2; i++, dst = vdst) { + BLOCKD *b = &xd->block[16 + 4 * i]; + int stride = b->dst_stride; + short *diff = b->diff; + + for (y = 0; y < 8; y++) { + for (x = 0; x < 8; x++) { + int a = dst[x] + diff[x]; + if (a < 0) + a = 0; + else if (a > 255) + a = 255; + dst[x] = a; + } + dst += stride; + diff += 8; + } + } +} +#endif + void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *xd) { #if ARCH_ARM BLOCKD *b = &xd->block[0]; diff --git a/vp8/common/recon.h b/vp8/common/recon.h index 2626a218dddefb52966b4b1af7d015edc89ee8c9..3527fc14d872ab8df39bae31983509b330aa3bfa 100644 --- a/vp8/common/recon.h +++ b/vp8/common/recon.h @@ -100,6 +100,11 @@ extern prototype_recon_macroblock(vp8_recon_recon_mb); #endif extern prototype_recon_macroblock(vp8_recon_recon_mby); +#ifndef vp8_recon_build_intra_predictors_sby_s +#define vp8_recon_build_intra_predictors_sby_s vp8_build_intra_predictors_sby_s +#endif +extern prototype_build_intra_predictors(vp8_recon_build_intra_predictors_sby_s); + #ifndef vp8_recon_build_intra_predictors_mby #define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby #endif @@ -126,6 +131,11 @@ extern prototype_build_intra_predictors\ extern prototype_build_intra_predictors\ (vp8_recon_build_intra_predictors_mby_s); +#ifndef vp8_recon_build_intra_predictors_sbuv_s +#define vp8_recon_build_intra_predictors_sbuv_s vp8_build_intra_predictors_sbuv_s +#endif +extern prototype_build_intra_predictors(vp8_recon_build_intra_predictors_sbuv_s); + #ifndef vp8_recon_build_intra_predictors_mbuv #define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv #endif @@ -214,10 +224,16 @@ typedef struct vp8_recon_rtcd_vtable { vp8_recon_fn_t recon4; vp8_recon_mb_fn_t recon_mb; vp8_recon_mb_fn_t recon_mby; +#if CONFIG_SUPERBLOCKS + vp8_build_intra_pred_fn_t build_intra_predictors_sby_s; +#endif vp8_build_intra_pred_fn_t build_intra_predictors_mby_s; vp8_build_intra_pred_fn_t build_intra_predictors_mby; #if CONFIG_COMP_INTRA_PRED vp8_build_intra_pred_fn_t build_comp_intra_predictors_mby; +#endif +#if CONFIG_SUPERBLOCKS + vp8_build_intra_pred_fn_t build_intra_predictors_sbuv_s; #endif vp8_build_intra_pred_fn_t build_intra_predictors_mbuv_s; vp8_build_intra_pred_fn_t build_intra_predictors_mbuv; diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 0d82db78475b566cb7cf54c3185cf0a3c653e0e6..1b5ef837fb8549ada3d4f1b72e580e57b0dcacf8 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -759,6 +759,56 @@ void vp8_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, vp8_build_1st_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride); } +#if CONFIG_SUPERBLOCKS +void vp8_build_inter32x32_predictors_sb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride) { + uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; + uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, + *v2 = x->second_pre.v_buffer; + int n; + + for (n = 0; n < 4; n++) + { + const int x_idx = n & 1, y_idx = n >> 1; + + x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride + x_idx * 16; + x->pre.u_buffer = u1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + x->pre.v_buffer = v1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + + vp8_build_1st_inter16x16_predictors_mb(x, + dst_y + y_idx * 16 * dst_ystride + x_idx * 16, + dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, + dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, + dst_ystride, dst_uvstride); + if (x->mode_info_context->mbmi.second_ref_frame) { + x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride + x_idx * 16; + x->second_pre.u_buffer = u2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + x->second_pre.v_buffer = v2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + + vp8_build_2nd_inter16x16_predictors_mb(x, + dst_y + y_idx * 16 * dst_ystride + x_idx * 16, + dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, + dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, + dst_ystride, dst_uvstride); + } + } + + x->pre.y_buffer = y1; + x->pre.u_buffer = u1; + x->pre.v_buffer = v1; + + if (x->mode_info_context->mbmi.second_ref_frame) { + x->second_pre.y_buffer = y2; + x->second_pre.u_buffer = u2; + x->second_pre.v_buffer = v2; + } +} +#endif + /* * The following functions should be called after an initial * call to vp8_build_inter16x16_predictors_mb() or _mby()/_mbuv(). diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c index 96bebc5be2ff8365b0addb3e05618466a9344184..d858cd153a586769cab1dd9fd6e3d1a5eb050dce 100644 --- a/vp8/common/reconintra.c +++ b/vp8/common/reconintra.c @@ -207,17 +207,18 @@ void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, } } -void vp8_build_intra_predictors_mby_internal(MACROBLOCKD *xd, - unsigned char *ypred_ptr, - int y_stride, int mode) { +void vp8_build_intra_predictors_internal(MACROBLOCKD *xd, + unsigned char *src, int src_stride, + unsigned char *ypred_ptr, + int y_stride, int mode, int bsize) { - unsigned char *yabove_row = xd->dst.y_buffer - xd->dst.y_stride; - unsigned char yleft_col[16]; + unsigned char *yabove_row = src - src_stride; + unsigned char yleft_col[32]; unsigned char ytop_left = yabove_row[-1]; int r, c, i; - for (i = 0; i < 16; i++) { - yleft_col[i] = xd->dst.y_buffer [i * xd->dst.y_stride - 1]; + for (i = 0; i < bsize; i++) { + yleft_col[i] = xd->dst.y_buffer [i * src_stride - 1]; } /* for Y */ @@ -227,58 +228,58 @@ void vp8_build_intra_predictors_mby_internal(MACROBLOCKD *xd, int i; int shift; int average = 0; - + int log2_bsize_minus_1; + + assert(bsize == 8 || bsize == 16 || bsize == 32); + if (bsize == 8) { + log2_bsize_minus_1 = 2; + } else if (bsize == 16) { + log2_bsize_minus_1 = 3; + } else /* bsize == 32 */ { + log2_bsize_minus_1 = 4; + } if (xd->up_available || xd->left_available) { if (xd->up_available) { - for (i = 0; i < 16; i++) { + for (i = 0; i < bsize; i++) { average += yabove_row[i]; } } if (xd->left_available) { - for (i = 0; i < 16; i++) { + for (i = 0; i < bsize; i++) { average += yleft_col[i]; } } - shift = 3 + xd->up_available + xd->left_available; + shift = log2_bsize_minus_1 + xd->up_available + xd->left_available; expected_dc = (average + (1 << (shift - 1))) >> shift; } else { expected_dc = 128; } - for (r = 0; r < 16; r++) { - vpx_memset(ypred_ptr, expected_dc, 16); - ypred_ptr += y_stride; /*16;*/ + for (r = 0; r < bsize; r++) { + vpx_memset(ypred_ptr, expected_dc, bsize); + ypred_ptr += y_stride; } } break; case V_PRED: { - - for (r = 0; r < 16; r++) { - - ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0]; - ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1]; - ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2]; - ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3]; + for (r = 0; r < bsize; r++) { + memcpy(ypred_ptr, yabove_row, bsize); ypred_ptr += y_stride; } } break; case H_PRED: { - - for (r = 0; r < 16; r++) { - - vpx_memset(ypred_ptr, yleft_col[r], 16); + for (r = 0; r < bsize; r++) { + vpx_memset(ypred_ptr, yleft_col[r], bsize); ypred_ptr += y_stride; } - } break; case TM_PRED: { - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) { + for (r = 0; r < bsize; r++) { + for (c = 0; c < bsize; c++) { int pred = yleft_col[r] + yabove_row[ c] - ytop_left; if (pred < 0) @@ -292,31 +293,30 @@ void vp8_build_intra_predictors_mby_internal(MACROBLOCKD *xd, ypred_ptr += y_stride; } - } break; case D45_PRED: { - d45_predictor(ypred_ptr, y_stride, 16, yabove_row, yleft_col); + d45_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); } break; case D135_PRED: { - d135_predictor(ypred_ptr, y_stride, 16, yabove_row, yleft_col); + d135_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); } break; case D117_PRED: { - d117_predictor(ypred_ptr, y_stride, 16, yabove_row, yleft_col); + d117_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); } break; case D153_PRED: { - d153_predictor(ypred_ptr, y_stride, 16, yabove_row, yleft_col); + d153_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); } break; case D27_PRED: { - d27_predictor(ypred_ptr, y_stride, 16, yabove_row, yleft_col); + d27_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); } break; case D63_PRED: { - d63_predictor(ypred_ptr, y_stride, 16, yabove_row, yleft_col); + d63_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col); } break; case I8X8_PRED: @@ -332,25 +332,36 @@ void vp8_build_intra_predictors_mby_internal(MACROBLOCKD *xd, } void vp8_build_intra_predictors_mby(MACROBLOCKD *xd) { - vp8_build_intra_predictors_mby_internal(xd, xd->predictor, 16, - xd->mode_info_context->mbmi.mode); + vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + xd->predictor, 16, + xd->mode_info_context->mbmi.mode, 16); } void vp8_build_intra_predictors_mby_s(MACROBLOCKD *xd) { - vp8_build_intra_predictors_mby_internal(xd, xd->dst.y_buffer, - xd->dst.y_stride, - xd->mode_info_context->mbmi.mode); + vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + xd->mode_info_context->mbmi.mode, 16); } +#if CONFIG_SUPERBLOCKS +void vp8_build_intra_predictors_sby_s(MACROBLOCKD *x) { + vp8_build_intra_predictors_internal(x, x->dst.y_buffer, x->dst.y_stride, + x->dst.y_buffer, x->dst.y_stride, + x->mode_info_context->mbmi.mode, 32); +} +#endif + #if CONFIG_COMP_INTRA_PRED void vp8_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { unsigned char predictor[2][256]; int i; - vp8_build_intra_predictors_mby_internal( - xd, predictor[0], 16, xd->mode_info_context->mbmi.mode); - vp8_build_intra_predictors_mby_internal( - xd, predictor[1], 16, xd->mode_info_context->mbmi.second_mode); + vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + predictor[0], 16, + xd->mode_info_context->mbmi.mode); + vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride, + predictor[1], 16, + xd->mode_info_context->mbmi.second_mode); for (i = 0; i < 256; i++) { xd->predictor[i] = (predictor[0][i] + predictor[1][i] + 1) >> 1; @@ -362,172 +373,37 @@ void vp8_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd, unsigned char *upred_ptr, unsigned char *vpred_ptr, int uv_stride, - int mode) { - YV12_BUFFER_CONFIG * dst = &xd->dst; - unsigned char *uabove_row = dst->u_buffer - dst->uv_stride; - unsigned char uleft_col[16]; - unsigned char utop_left = uabove_row[-1]; - unsigned char *vabove_row = dst->v_buffer - dst->uv_stride; - unsigned char vleft_col[20]; - unsigned char vtop_left = vabove_row[-1]; - - int i, j; - - for (i = 0; i < 8; i++) { - uleft_col[i] = dst->u_buffer [i * dst->uv_stride - 1]; - vleft_col[i] = dst->v_buffer [i * dst->uv_stride - 1]; - } - - switch (mode) { - case DC_PRED: { - int expected_udc; - int expected_vdc; - int i; - int shift; - int Uaverage = 0; - int Vaverage = 0; - - if (xd->up_available) { - for (i = 0; i < 8; i++) { - Uaverage += uabove_row[i]; - Vaverage += vabove_row[i]; - } - } - - if (xd->left_available) { - for (i = 0; i < 8; i++) { - Uaverage += uleft_col[i]; - Vaverage += vleft_col[i]; - } - } - - if (!xd->up_available && !xd->left_available) { - expected_udc = 128; - expected_vdc = 128; - } else { - shift = 2 + xd->up_available + xd->left_available; - expected_udc = (Uaverage + (1 << (shift - 1))) >> shift; - expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift; - } - - - /*vpx_memset(upred_ptr,expected_udc,64);*/ - /*vpx_memset(vpred_ptr,expected_vdc,64);*/ - for (i = 0; i < 8; i++) { - vpx_memset(upred_ptr, expected_udc, 8); - vpx_memset(vpred_ptr, expected_vdc, 8); - upred_ptr += uv_stride; /*8;*/ - vpred_ptr += uv_stride; /*8;*/ - } - } - break; - case V_PRED: { - int i; - - for (i = 0; i < 8; i++) { - vpx_memcpy(upred_ptr, uabove_row, 8); - vpx_memcpy(vpred_ptr, vabove_row, 8); - upred_ptr += uv_stride; /*8;*/ - vpred_ptr += uv_stride; /*8;*/ - } - - } - break; - case H_PRED: { - int i; - - for (i = 0; i < 8; i++) { - vpx_memset(upred_ptr, uleft_col[i], 8); - vpx_memset(vpred_ptr, vleft_col[i], 8); - upred_ptr += uv_stride; /*8;*/ - vpred_ptr += uv_stride; /*8;*/ - } - } - - break; - case TM_PRED: { - int i; - - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - int predu = uleft_col[i] + uabove_row[j] - utop_left; - int predv = vleft_col[i] + vabove_row[j] - vtop_left; - - if (predu < 0) - predu = 0; - - if (predu > 255) - predu = 255; - - if (predv < 0) - predv = 0; - - if (predv > 255) - predv = 255; - - upred_ptr[j] = predu; - vpred_ptr[j] = predv; - } - - upred_ptr += uv_stride; /*8;*/ - vpred_ptr += uv_stride; /*8;*/ - } - - } - break; - case D45_PRED: { - d45_predictor(upred_ptr, uv_stride, 8, uabove_row, uleft_col); - d45_predictor(vpred_ptr, uv_stride, 8, vabove_row, vleft_col); - } - break; - case D135_PRED: { - d135_predictor(upred_ptr, uv_stride, 8, uabove_row, uleft_col); - d135_predictor(vpred_ptr, uv_stride, 8, vabove_row, vleft_col); - } - break; - case D117_PRED: { - d117_predictor(upred_ptr, uv_stride, 8, uabove_row, uleft_col); - d117_predictor(vpred_ptr, uv_stride, 8, vabove_row, vleft_col); - } - break; - case D153_PRED: { - d153_predictor(upred_ptr, uv_stride, 8, uabove_row, uleft_col); - d153_predictor(vpred_ptr, uv_stride, 8, vabove_row, vleft_col); - } - break; - case D27_PRED: { - d27_predictor(upred_ptr, uv_stride, 8, uabove_row, uleft_col); - d27_predictor(vpred_ptr, uv_stride, 8, vabove_row, vleft_col); - } - break; - case D63_PRED: { - d63_predictor(upred_ptr, uv_stride, 8, uabove_row, uleft_col); - d63_predictor(vpred_ptr, uv_stride, 8, vabove_row, vleft_col); - } - break; - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } + int mode, int bsize) { + vp8_build_intra_predictors_internal(xd, xd->dst.u_buffer, xd->dst.uv_stride, + upred_ptr, uv_stride, mode, bsize); + vp8_build_intra_predictors_internal(xd, xd->dst.v_buffer, xd->dst.uv_stride, + vpred_ptr, uv_stride, mode, bsize); } void vp8_build_intra_predictors_mbuv(MACROBLOCKD *xd) { - vp8_build_intra_predictors_mbuv_internal( - xd, &xd->predictor[256], &xd->predictor[320], - 8, xd->mode_info_context->mbmi.uv_mode); + vp8_build_intra_predictors_mbuv_internal(xd, &xd->predictor[256], + &xd->predictor[320], 8, + xd->mode_info_context->mbmi.uv_mode, + 8); } void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *xd) { - vp8_build_intra_predictors_mbuv_internal( - xd, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->mode_info_context->mbmi.uv_mode); + vp8_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, + xd->mode_info_context->mbmi.uv_mode, + 8); } +#if CONFIG_SUPERBLOCKS +void vp8_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { + vp8_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, + xd->mode_info_context->mbmi.uv_mode, + 16); +} +#endif + #if CONFIG_COMP_INTRA_PRED void vp8_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { unsigned char predictor[2][2][64]; @@ -541,7 +417,8 @@ void vp8_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { xd->mode_info_context->mbmi.second_uv_mode); for (i = 0; i < 64; i++) { xd->predictor[256 + i] = (predictor[0][0][i] + predictor[0][1][i] + 1) >> 1; - xd->predictor[256 + 64 + i] = (predictor[1][0][i] + predictor[1][1][i] + 1) >> 1; + xd->predictor[256 + 64 + i] = (predictor[1][0][i] + + predictor[1][1][i] + 1) >> 1; } } #endif diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index 94826ef6c6941760c45ab5ede76d50f76faecd7a..5e0600c2d1fe9e5f1e73fbc2624b7360460ea86f 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -29,34 +29,31 @@ int dec_mvcount = 0; #endif static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_bmode_tree, p); - - return i; + return vp8_treed_read(bc, vp8_bmode_tree, p); } static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_ymode_tree, p); + return vp8_treed_read(bc, vp8_ymode_tree, p); +} - return i; +#if CONFIG_SUPERBLOCKS +static int vp8_sb_kfread_ymode(vp8_reader *bc, const vp8_prob *p) { + return vp8_treed_read(bc, vp8_uv_mode_tree, p); } +#endif static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); - - return i; + return vp8_treed_read(bc, vp8_kf_ymode_tree, p); } -static int vp8_read_i8x8_mode(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_i8x8_mode_tree, p); - return i; +static int vp8_read_i8x8_mode(vp8_reader *bc, const vp8_prob *p) { + return vp8_treed_read(bc, vp8_i8x8_mode_tree, p); } static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); - - return i; + return vp8_treed_read(bc, vp8_uv_mode_tree, p); } // This function reads the current macro block's segnent id from the bitstream @@ -112,8 +109,14 @@ static void vp8_kfread_modes(VP8D_COMP *pbi, m->mbmi.mb_skip_coeff = 0; } +#if CONFIG_SUPERBLOCKS + if (m->mbmi.encoded_as_sb) { + y_mode = (MB_PREDICTION_MODE) vp8_sb_kfread_ymode(bc, + pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]); + } else +#endif y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, - pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); + pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); #if CONFIG_COMP_INTRA_PRED m->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif @@ -398,16 +401,18 @@ static MV_REFERENCE_FRAME read_ref_frame(VP8D_COMP *pbi, return (MV_REFERENCE_FRAME)ref_frame; } -static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_mv_ref_tree, p); +#if CONFIG_SUPERBLOCKS +static MB_PREDICTION_MODE read_sb_mv_ref(vp8_reader *bc, const vp8_prob *p) { + return (MB_PREDICTION_MODE) vp8_treed_read(bc, vp8_sb_mv_ref_tree, p); +} +#endif - return (MB_PREDICTION_MODE)i; +static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p) { + return (MB_PREDICTION_MODE) vp8_treed_read(bc, vp8_mv_ref_tree, p); } static B_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p) { - const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p); - - return (B_PREDICTION_MODE)i; + return (B_PREDICTION_MODE) vp8_treed_read(bc, vp8_sub_mv_ref_tree, p); } #ifdef VPX_MODE_COUNT @@ -537,15 +542,36 @@ static void read_mb_segment_id(VP8D_COMP *pbi, // Else .... decode it explicitly else { vp8_read_mb_segid(bc, mbmi, xd); - cm->last_frame_seg_map[index] = mbmi->segment_id; } - } // Normal unpredicted coding mode else { vp8_read_mb_segid(bc, mbmi, xd); + } +#if CONFIG_SUPERBLOCKS + if (mbmi->encoded_as_sb) { + cm->last_frame_seg_map[index] = + cm->last_frame_seg_map[index + 1] = + cm->last_frame_seg_map[index + cm->mb_cols] = + cm->last_frame_seg_map[index + cm->mb_cols + 1] = mbmi->segment_id; + } else +#endif + { cm->last_frame_seg_map[index] = mbmi->segment_id; } + } else { +#if CONFIG_SUPERBLOCKS + if (mbmi->encoded_as_sb) { + mbmi->segment_id = + cm->last_frame_seg_map[index] && + cm->last_frame_seg_map[index + 1] && + cm->last_frame_seg_map[index + cm->mb_cols] && + cm->last_frame_seg_map[index + cm->mb_cols + 1]; + } else +#endif + { + mbmi->segment_id = cm->last_frame_seg_map[index]; + } } } else { // The encoder explicitly sets the segment_id to 0 @@ -667,6 +693,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); } else { +#if CONFIG_SUPERBLOCKS + if (mbmi->encoded_as_sb) { + mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); + } else +#endif mbmi->mode = read_mv_ref(bc, mv_ref_p); vp8_accum_mv_refs(&pbi->common, mbmi->mode, rct); @@ -963,6 +994,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = (MB_PREDICTION_MODE) get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); else { + // FIXME write using SB mode tree mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob); pbi->common.fc.ymode_counts[mbmi->mode]++; @@ -1045,6 +1077,9 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) { int mb_row = (sb_row << 1); for (sb_col = 0; sb_col < sb_cols; sb_col++) { +#if CONFIG_SUPERBLOCKS + mi->mbmi.encoded_as_sb = vp8_read(&pbi->bc, cm->sb_coded); +#endif for (i = 0; i < 4; i++) { int dy = row_delta[i]; @@ -1059,6 +1094,10 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) { prev_mi += offset_extended; continue; } +#if CONFIG_SUPERBLOCKS + if (i) + mi->mbmi.encoded_as_sb = 0; +#endif // Make sure the MacroBlockD mode info pointer is set correctly xd->mode_info_context = mi; @@ -1074,6 +1113,18 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) { read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col); +#if CONFIG_SUPERBLOCKS + if (mi->mbmi.encoded_as_sb) { + assert(!i); + mb_col += 2; + mi[1] = mi[cm->mode_info_stride] = + mi[cm->mode_info_stride + 1] = mi[0]; + mi += 2; + prev_mi += 2; + break; + } +#endif + /* next macroblock */ mb_row += dy; mb_col += dx; diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 6ff91489352658e1d45c3f900753eb74d311d43b..61d3c8d2c4989ab36f70412a5a5d5a5a27cf7a43 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -175,10 +175,27 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) { */ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_sbuv_s)(xd); + RECON_INVOKE(&pbi->common.rtcd.recon, + build_intra_predictors_sby_s)(xd); + } else { +#endif RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv_s)(xd); RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mby_s)(xd); +#if CONFIG_SUPERBLOCKS + } +#endif } else { +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + vp8_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } else { +#endif vp8_build_1st_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); @@ -188,6 +205,9 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) { xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); } +#if CONFIG_SUPERBLOCKS + } +#endif } #ifdef DEC_DEBUG if (dec_debug) { @@ -204,11 +224,15 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) { extern const int vp8_i8x8_block[4]; static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - unsigned int mb_idx) { + unsigned int mb_col) { int eobtotal = 0; MB_PREDICTION_MODE mode; int i; int tx_type; +#if CONFIG_SUPERBLOCKS + VP8_COMMON *pc = &pbi->common; + int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff; +#endif #if CONFIG_HYBRIDTRANSFORM int QIndex = xd->q_index; @@ -264,11 +288,25 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.txfm_size = TX_8X8; } #endif +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + xd->mode_info_context->mbmi.txfm_size = TX_8X8; + } +#endif tx_type = xd->mode_info_context->mbmi.txfm_size; if (xd->mode_info_context->mbmi.mb_skip_coeff) { vp8_reset_mb_tokens_context(xd); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + xd->above_context++; + xd->left_context++; + vp8_reset_mb_tokens_context(xd); + xd->above_context--; + xd->left_context--; + } +#endif } else if (!vp8dx_bool_error(xd->current_bc)) { for (i = 0; i < 25; i++) { xd->block[i].eob = 0; @@ -311,8 +349,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, * */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; - skip_recon_mb(pbi, xd); - return; +#if CONFIG_SUPERBLOCKS + if (!xd->mode_info_context->mbmi.encoded_as_sb || orig_skip_flag) +#endif + { + skip_recon_mb(pbi, xd); + return; + } } #ifdef DEC_DEBUG @@ -343,6 +386,12 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* do prediction */ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_sby_s)(xd); + RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_sbuv_s)(xd); + } else +#endif if (mode != I8X8_PRED) { RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv)(xd); if (mode != B_PRED) { @@ -358,6 +407,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, #endif } } else { +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + vp8_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } else +#endif vp8_build_inter_predictors_mb(xd); } @@ -481,6 +537,32 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, else #endif if (tx_type == TX_8X8) { +#if CONFIG_SUPERBLOCKS + void *orig = xd->mode_info_context; + int n, num = xd->mode_info_context->mbmi.encoded_as_sb ? 4 : 1; + for (n = 0; n < num; n++) { + if (n != 0) { + for (i = 0; i < 25; i++) { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } + xd->above_context = pc->above_context + mb_col + (n & 1); + xd->left_context = pc->left_context + (n >> 1); + xd->mode_info_context = orig; + xd->mode_info_context += (n & 1); + xd->mode_info_context += (n >> 1) * pc->mode_info_stride; + if (!orig_skip_flag) { + eobtotal = vp8_decode_mb_tokens_8x8(pbi, xd); + if (eobtotal == 0) // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + } else { + vp8_reset_mb_tokens_context(xd); + } + } + + if (xd->mode_info_context->mbmi.mb_skip_coeff) + continue; // only happens for SBs, which are already in dest buffer +#endif DEQUANT_INVOKE(&pbi->dequant, block_2x2)(b); #ifdef DEC_DEBUG if (dec_debug) { @@ -501,10 +583,27 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[5] = 0; ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; - DEQUANT_INVOKE(&pbi->dequant, dc_idct_add_y_block_8x8) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + vp8_dequant_dc_idct_add_y_block_8x8_inplace_c(xd->qcoeff, + xd->block[0].dequant, + xd->dst.y_buffer + (n >> 1) * 16 * xd->dst.y_stride + (n & 1) * 16, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + // do UV inline also + vp8_dequant_idct_add_uv_block_8x8_inplace_c(xd->qcoeff + 16 * 16, + xd->block[16].dequant, + xd->dst.u_buffer + (n >> 1) * 8 * xd->dst.uv_stride + (n & 1) * 8, + xd->dst.v_buffer + (n >> 1) * 8 * xd->dst.uv_stride + (n & 1) * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); + } else +#endif + DEQUANT_INVOKE(&pbi->dequant, dc_idct_add_y_block_8x8)(xd->qcoeff, + xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); +#if CONFIG_SUPERBLOCKS + } + xd->mode_info_context = orig; +#endif } else { DEQUANT_INVOKE(&pbi->dequant, block)(b); if (xd->eobs[24] > 1) { @@ -529,7 +628,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } } - if (tx_type == TX_8X8 +#if CONFIG_SUPERBLOCKS + if (!xd->mode_info_context->mbmi.encoded_as_sb) { +#endif + if (tx_type == TX_8X8 #if CONFIG_TX16X16 || tx_type == TX_16X16 #endif @@ -543,6 +645,9 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, (xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); +#if CONFIG_SUPERBLOCKS + } +#endif } @@ -582,15 +687,21 @@ decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd) { int row_delta[4] = { 0, +1, 0, -1}; int col_delta[4] = { +1, -1, +1, +1}; int sb_cols = (pc->mb_cols + 1) >> 1; - ENTROPY_CONTEXT_PLANES left_context[2]; // For a SB there are 2 left contexts, each pertaining to a MB row within - vpx_memset(left_context, 0, sizeof(left_context)); + vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); mb_row = mbrow; mb_col = 0; for (sb_col = 0; sb_col < sb_cols; sb_col++) { + MODE_INFO *mi = xd->mode_info_context; + +#if CONFIG_SUPERBLOCKS + if (pbi->interleaved_decoding) + mi->mbmi.encoded_as_sb = vp8_read(&pbi->bc, pc->sb_coded); +#endif + // Process the 4 MBs within the SB in the order: // top-left, top-right, bottom-left, bottom-right for (i = 0; i < 4; i++) { @@ -598,6 +709,7 @@ decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd) { int dx = col_delta[i]; int offset_extended = dy * xd->mode_info_stride + dx; + mi = xd->mode_info_context; if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { // MB lies outside frame, skip on to next mb_row += dy; @@ -610,13 +722,10 @@ decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd) { #ifdef DEC_DEBUG dec_debug = (pc->current_video_frame == 0 && mb_row == 0 && mb_col == 0); #endif - // Copy in the appropriate left context for this MB row - vpx_memcpy(&pc->left_context, - &left_context[i >> 1], - sizeof(ENTROPY_CONTEXT_PLANES)); // Set above context pointer xd->above_context = pc->above_context + mb_col; + xd->left_context = pc->left_context + (i >> 1); /* Distance of Mb to the various image edges. * These are specified to 8th pel as they are always compared to @@ -639,6 +748,10 @@ decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd) { xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; +#if CONFIG_SUPERBLOCKS + if (i) + mi->mbmi.encoded_as_sb = 0; +#endif if(pbi->interleaved_decoding) vpx_decode_mb_mode_mv(pbi, xd, mb_row, mb_col); @@ -681,15 +794,34 @@ decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd) { xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; } - decode_macroblock(pbi, xd, mb_row * pc->mb_cols + mb_col); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + mi[1] = mi[0]; + mi[pc->mode_info_stride] = mi[0]; + mi[pc->mode_info_stride + 1] = mi[0]; + } +#endif + decode_macroblock(pbi, xd, mb_col); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + mi[1].mbmi.txfm_size = mi[0].mbmi.txfm_size; + mi[pc->mode_info_stride].mbmi.txfm_size = mi[0].mbmi.txfm_size; + mi[pc->mode_info_stride + 1].mbmi.txfm_size = mi[0].mbmi.txfm_size; + } +#endif /* check if the boolean decoder has suffered an error */ xd->corrupted |= vp8dx_bool_error(xd->current_bc); - // Store the modified left context for the MB row locally - vpx_memcpy(&left_context[i >> 1], - &pc->left_context, - sizeof(ENTROPY_CONTEXT_PLANES)); +#if CONFIG_SUPERBLOCKS + if (mi->mbmi.encoded_as_sb) { + assert(!i); + mb_col += 2; + xd->mode_info_context += 2; + xd->prev_mode_info_context += 2; + break; + } +#endif // skip to next MB xd->mode_info_context += offset_extended; @@ -806,7 +938,6 @@ static void init_frame(VP8D_COMP *pbi) { vp8_setup_interp_filters(xd, pc->mcomp_filter_type, pc); } - xd->left_context = &pc->left_context; xd->mode_info_context = pc->mi; xd->prev_mode_info_context = pc->prev_mi; xd->frame_type = pc->frame_type; @@ -1151,6 +1282,10 @@ int vp8_decode_frame(VP8D_COMP *pbi) { } } +#if CONFIG_SUPERBLOCKS + pc->sb_coded = vp8_read_literal(bc, 8); +#endif + /* Read the loop filter level and type */ pc->txfm_mode = (TXFM_MODE) vp8_read_bit(bc); diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c index 36eea5d6f7d996ffd4045c96a81b110d9b3f2298..e97d3298f5c35f4537eba81227492f56fb555273 100644 --- a/vp8/decoder/idct_blk.c +++ b/vp8/decoder/idct_blk.c @@ -127,6 +127,19 @@ void vp8_dequant_dc_idct_add_y_block_8x8_c } +#if CONFIG_SUPERBLOCKS +void vp8_dequant_dc_idct_add_y_block_8x8_inplace_c +(short *q, short *dq, + unsigned char *dst, int stride, char *eobs, short *dc, MACROBLOCKD *xd) { + + vp8_dequant_dc_idct_add_8x8_c(q, dq, dst, dst, stride, stride, dc[0]); + vp8_dequant_dc_idct_add_8x8_c(&q[64], dq, dst + 8, dst + 8, stride, stride, dc[1]); + vp8_dequant_dc_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, dst + 8 * stride, stride, stride, dc[4]); + vp8_dequant_dc_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, dst + 8 * stride + 8, stride, stride, dc[8]); + +} +#endif + void vp8_dequant_idct_add_y_block_8x8_c (short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, MACROBLOCKD *xd) { @@ -153,6 +166,18 @@ void vp8_dequant_idct_add_uv_block_8x8_c vp8_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride); } +#if CONFIG_SUPERBLOCKS +void vp8_dequant_idct_add_uv_block_8x8_inplace_c +(short *q, short *dq, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, MACROBLOCKD *xd) { + vp8_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride); + + q += 64; + + vp8_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride); +} +#endif + #if CONFIG_LOSSLESS void vp8_dequant_dc_idct_add_y_block_lossless_c (short *q, short *dq, unsigned char *pre, diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 63499a8f7e68b9d2c379efa914f4803e12e05584..2e1364817fdf5cb8ac0236d80f7f9a69b38dcb84 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -149,7 +149,7 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) { pbi->decoded_key_frame = 0; - pbi->interleaved_decoding = CONFIG_NEWBESTREFMV; + pbi->interleaved_decoding = CONFIG_NEWBESTREFMV || CONFIG_SUPERBLOCKS; return (VP8D_PTR) pbi; } diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 76aed7e2d908af7a5f0866b542ca498ce506c2eb..90bc8e98776eaa0812fc4228dc4a4a499bf59c9f 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -288,6 +288,12 @@ static void kfwrite_ymode(vp8_writer *bc, int m, const vp8_prob *p) { vp8_write_token(bc, vp8_kf_ymode_tree, p, vp8_kf_ymode_encodings + m); } +#if CONFIG_SUPERBLOCKS +static void sb_kfwrite_ymode(vp8_writer *bc, int m, const vp8_prob *p) { + vp8_write_token(bc, vp8_uv_mode_tree, p, vp8_sb_kf_ymode_encodings + m); +} +#endif + static void write_i8x8_mode(vp8_writer *bc, int m, const vp8_prob *p) { vp8_write_token(bc, vp8_i8x8_mode_tree, p, vp8_i8x8_mode_encodings + m); } @@ -533,6 +539,16 @@ static void write_mv_ref vp8_mv_ref_encoding_array - NEARESTMV + m); } +#if CONFIG_SUPERBLOCKS +static void write_sb_mv_ref(vp8_writer *w, MB_PREDICTION_MODE m, const vp8_prob *p) { +#if CONFIG_DEBUG + assert(NEARESTMV <= m && m < SPLITMV); +#endif + vp8_write_token(w, vp8_sb_mv_ref_tree, p, + vp8_sb_mv_ref_encoding_array - NEARESTMV + m); +} +#endif + static void write_sub_mv_ref ( vp8_writer *w, B_PREDICTION_MODE m, const vp8_prob *p @@ -810,6 +826,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { // Process the 4 MBs in the order: // top-left, top-right, bottom-left, bottom-right +#if CONFIG_SUPERBLOCKS + vp8_write(w, m->mbmi.encoded_as_sb, pc->sb_coded); +#endif for (i = 0; i < 4; i++) { MB_MODE_INFO *mi; MV_REFERENCE_FRAME rf; @@ -872,7 +891,15 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { if (pc->mb_no_coeff_skip && (!segfeature_active(xd, segment_id, SEG_LVL_EOB) || (get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - vp8_encode_bool(w, mi->mb_skip_coeff, + int skip_coeff = mi->mb_skip_coeff; +#if CONFIG_SUPERBLOCKS + if (mi->encoded_as_sb) { + skip_coeff &= m[1].mbmi.mb_skip_coeff; + skip_coeff &= m[mis].mbmi.mb_skip_coeff; + skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; + } +#endif + vp8_encode_bool(w, skip_coeff, get_pred_prob(pc, xd, PRED_MBSKIP)); } @@ -884,6 +911,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { active_section = 6; #endif + // TODO(rbultje) write using SB tree structure + if (!segfeature_active(xd, segment_id, SEG_LVL_MODE)) { write_ymode(w, mode, pc->fc.ymode_prob); } @@ -949,7 +978,14 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { // Is the segment coding of mode enabled if (!segfeature_active(xd, segment_id, SEG_LVL_MODE)) { - write_mv_ref(w, mode, mv_ref_p); +#if CONFIG_SUPERBLOCKS + if (mi->encoded_as_sb) { + write_sb_mv_ref(w, mode, mv_ref_p); + } else +#endif + { + write_mv_ref(w, mode, mv_ref_p); + } vp8_accum_mv_refs(&cpi->common, mode, ct); } @@ -1085,6 +1121,17 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { } } +#if CONFIG_SUPERBLOCKS + if (m->mbmi.encoded_as_sb) { + assert(!i); + mb_col += 2; + m += 2; + cpi->mb.partition_info += 2; + prev_m += 2; + break; + } +#endif + // Next MB mb_row += dy; mb_col += dx; @@ -1151,6 +1198,9 @@ static void write_kfmodes(VP8_COMP *cpi) { mb_col = 0; for (col = 0; col < c->mb_cols; col += 2) { +#if CONFIG_SUPERBLOCKS + vp8_write(bc, m->mbmi.encoded_as_sb, c->sb_coded); +#endif // Process the 4 MBs in the order: // top-left, top-right, bottom-left, bottom-right for (i = 0; i < 4; i++) { @@ -1181,11 +1231,27 @@ static void write_kfmodes(VP8_COMP *cpi) { if (c->mb_no_coeff_skip && (!segfeature_active(xd, segment_id, SEG_LVL_EOB) || (get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, + int skip_coeff = m->mbmi.mb_skip_coeff; +#if CONFIG_SUPERBLOCKS + if (m->mbmi.encoded_as_sb) { + skip_coeff &= m[1].mbmi.mb_skip_coeff; + skip_coeff &= m[mis].mbmi.mb_skip_coeff; + skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; + } +#endif + vp8_encode_bool(bc, skip_coeff, get_pred_prob(c, xd, PRED_MBSKIP)); } - kfwrite_ymode(bc, ym, - c->kf_ymode_prob[c->kf_ymode_probs_index]); +#if CONFIG_SUPERBLOCKS + if (m->mbmi.encoded_as_sb) { + sb_kfwrite_ymode(bc, ym, + c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); + } else +#endif + { + kfwrite_ymode(bc, ym, + c->kf_ymode_prob[c->kf_ymode_probs_index]); + } if (ym == B_PRED) { const int mis = c->mode_info_stride; @@ -1233,6 +1299,14 @@ static void write_kfmodes(VP8_COMP *cpi) { } else write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); +#if CONFIG_SUPERBLOCKS + if (m->mbmi.encoded_as_sb) { + assert(!i); + mb_col += 2; + m += 2; + break; + } +#endif // Next MB mb_row += dy; mb_col += dx; @@ -1793,7 +1867,7 @@ static void put_delta_q(vp8_writer *bc, int delta_q) { } else vp8_write_bit(bc, 0); } -extern const unsigned int kf_y_mode_cts[8][VP8_YMODES]; + static void decide_kf_ymode_entropy(VP8_COMP *cpi) { int mode_cost[MB_MODE_COUNT]; @@ -1808,6 +1882,13 @@ static void decide_kf_ymode_entropy(VP8_COMP *cpi) { for (j = 0; j < VP8_YMODES; j++) { cost += mode_cost[j] * cpi->ymode_count[j]; } +#if CONFIG_SUPERBLOCKS + vp8_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i], + vp8_sb_ymode_tree); + for (j = 0; j < VP8_I32X32_MODES; j++) { + cost += mode_cost[j] * cpi->sb_ymode_count[j]; + } +#endif if (cost < bestcost) { bestindex = i; bestcost = cost; @@ -1906,11 +1987,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) // Select the coding strategy (temporal or spatial) choose_segmap_coding_method(cpi); - // Take a copy of the segment map if it changed for - // future comparison - vpx_memcpy(pc->last_frame_seg_map, - cpi->segmentation_map, pc->MBs); - // Write out the chosen coding method. vp8_write_bit(bc, (pc->temporal_update) ? 1 : 0); } @@ -2048,6 +2124,19 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) } } +#if CONFIG_SUPERBLOCKS + { + /* sb mode probability */ + int sb_coded = 256 - (cpi->sb_count << 8) / (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1)); + if (sb_coded <= 0) + sb_coded = 1; + else if (sb_coded >= 256) + sb_coded = 255; + pc->sb_coded = sb_coded; + vp8_write_literal(bc, pc->sb_coded, 8); + } +#endif + vp8_write_bit(bc, pc->txfm_mode); // Encode the loop filter level and type diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index dfc1d743ec585cc7fa6dc0a2ef5f7c24bb891d2b..d73af4faafd241ff13e5dbf91ed487bbc87dbe99 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -82,7 +82,9 @@ typedef struct { int best_mode_index; int rddiv; int rdmult; - + int hybrid_pred_diff; + int comp_pred_diff; + int single_pred_diff; } PICK_MODE_CONTEXT; typedef struct { @@ -139,12 +141,6 @@ typedef struct { int mv_col_max; int mv_row_min; int mv_row_max; -#if CONFIG_SUPERBLOCKS - int mv_col_min_sb; - int mv_col_max_sb; - int mv_row_min_sb; - int mv_row_max_sb; -#endif int skip; @@ -163,8 +159,6 @@ typedef struct { int optimize; int q_index; - int encode_as_sb; - // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: PICK_MODE_CONTEXT mb_context[4]; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index e58c852a7b5288591efb6f9b38d19b3398327478..4472497e0b064254041fa3e4db60ea5fc828a95e 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -57,16 +57,24 @@ extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MB_ROW_COMP *mbr_ei, int mb_row, int count); -extern int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, +int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, int *returndistortion); +extern void vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, - int recon_uvoffset); + int recon_uvoffset, int *r, int *d); void vp8_build_block_offsets(MACROBLOCK *x); void vp8_setup_block_ptrs(MACROBLOCK *x); void vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, int output_enabled); +void vp8cx_encode_inter_superblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, int mb_col, int mb_row); void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int output_enabled); +void vp8cx_encode_intra_super_block(VP8_COMP *cpi, + MACROBLOCK *x, + TOKENEXTRA **t, int mb_col); static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x); @@ -378,6 +386,13 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { // Restore the coding context of the MB to that that was in place // when the mode was picked for it vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO)); +#if CONFIG_SUPERBLOCKS + if (mi->mbmi.encoded_as_sb) { + vpx_memcpy(xd->mode_info_context + 1, mi, sizeof(MODE_INFO)); + vpx_memcpy(xd->mode_info_context + cpi->common.mode_info_stride, mi, sizeof(MODE_INFO)); + vpx_memcpy(xd->mode_info_context + cpi->common.mode_info_stride + 1, mi, sizeof(MODE_INFO)); + } +#endif if (mb_mode == B_PRED) { for (i = 0; i < 16; i++) { @@ -448,6 +463,10 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { cpi->prediction_error += ctx->distortion; cpi->intra_error += ctx->intra_error; + + cpi->rd_comp_pred_diff[0] += ctx->single_pred_diff; + cpi->rd_comp_pred_diff[1] += ctx->comp_pred_diff; + cpi->rd_comp_pred_diff[2] += ctx->hybrid_pred_diff; } } @@ -458,7 +477,8 @@ static void pick_mb_modes(VP8_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, TOKENEXTRA **tp, - int *totalrate) { + int *totalrate, + int *totaldist) { int i; int map_index; int recon_yoffset, recon_uvoffset; @@ -477,7 +497,7 @@ static void pick_mb_modes(VP8_COMP *cpi, /* Function should not modify L & A contexts; save and restore on exit */ vpx_memcpy(left_context, - cpi->left_context, + cm->left_context, sizeof(left_context)); vpx_memcpy(above_context, initial_above_context_ptr, @@ -525,9 +545,7 @@ static void pick_mb_modes(VP8_COMP *cpi, // Restore the appropriate left context depending on which // row in the SB the MB is situated - vpx_memcpy(&cm->left_context, - &cpi->left_context[i >> 1], - sizeof(ENTROPY_CONTEXT_PLANES)); + xd->left_context = cm->left_context + (i >> 1); // Set up distance of MB to edge of frame in 1/8th pel units xd->mb_to_top_edge = -((mb_row * 16) << 3); @@ -568,9 +586,11 @@ static void pick_mb_modes(VP8_COMP *cpi, // Is segmentation enabled if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id - if (cpi->segmentation_map[map_index] <= 3) + if (xd->update_mb_segmentation_map) mbmi->segment_id = cpi->segmentation_map[map_index]; else + mbmi->segment_id = cm->last_frame_seg_map[map_index]; + if (mbmi->segment_id > 3) mbmi->segment_id = 0; vp8cx_mb_init_quantizer(cpi, x); @@ -583,22 +603,29 @@ static void pick_mb_modes(VP8_COMP *cpi, /* force 4x4 transform for mode selection */ mbmi->txfm_size = TX_4X4; // TODO IS this right?? +#if CONFIG_SUPERBLOCKS + xd->mode_info_context->mbmi.encoded_as_sb = 0; +#endif + cpi->update_context = 0; // TODO Do we need this now?? // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (cm->frame_type == KEY_FRAME) { - *totalrate += vp8_rd_pick_intra_mode(cpi, x); - - // Save the coding context - vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context, - sizeof(MODE_INFO)); + int r, d; + vp8_rd_pick_intra_mode(cpi, x, &r, &d); + *totalrate += r; + *totaldist += d; // Dummy encode, do not do the tokenization vp8cx_encode_intra_macro_block(cpi, x, tp, 0); // Note the encoder may have changed the segment_id + + // Save the coding context + vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context, + sizeof(MODE_INFO)); } else { - int seg_id; + int seg_id, r, d; if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && !segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && @@ -612,9 +639,10 @@ static void pick_mb_modes(VP8_COMP *cpi, cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs; } - *totalrate += vp8cx_pick_mode_inter_macroblock(cpi, x, - recon_yoffset, - recon_uvoffset); + vp8cx_pick_mode_inter_macroblock(cpi, x, recon_yoffset, + recon_uvoffset, &r, &d); + *totalrate += r; + *totaldist += d; // Dummy encode, do not do the tokenization vp8cx_encode_inter_macroblock(cpi, x, tp, @@ -639,11 +667,6 @@ static void pick_mb_modes(VP8_COMP *cpi, } } - // Keep a copy of the updated left context - vpx_memcpy(&cpi->left_context[i >> 1], - &cm->left_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - // Next MB mb_row += dy; mb_col += dx; @@ -664,7 +687,7 @@ static void pick_mb_modes(VP8_COMP *cpi, } /* Restore L & A coding context to those in place on entry */ - vpx_memcpy(cpi->left_context, + vpx_memcpy(cm->left_context, left_context, sizeof(left_context)); vpx_memcpy(initial_above_context_ptr, @@ -672,6 +695,156 @@ static void pick_mb_modes(VP8_COMP *cpi, sizeof(above_context)); } +#if CONFIG_SUPERBLOCKS +static void pick_sb_modes (VP8_COMP *cpi, + VP8_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate, + int *totaldist) +{ + int map_index; + int recon_yoffset, recon_uvoffset; + int ref_fb_idx = cm->lst_fb_idx; + int dst_fb_idx = cm->new_fb_idx; + int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; + ENTROPY_CONTEXT_PLANES left_context[2]; + ENTROPY_CONTEXT_PLANES above_context[2]; + ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context + + mb_col; + + /* Function should not modify L & A contexts; save and restore on exit */ + vpx_memcpy (left_context, + cm->left_context, + sizeof(left_context)); + vpx_memcpy (above_context, + initial_above_context_ptr, + sizeof(above_context)); + + map_index = (mb_row * cpi->common.mb_cols) + mb_col; + x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; + + /* set above context pointer */ + xd->above_context = cm->above_context + mb_col; + + /* Restore the appropriate left context depending on which + * row in the SB the MB is situated */ + xd->left_context = cm->left_context; + + // Set up distance of MB to edge of frame in 1/8th pel units + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; + + /* Set up limit values for MV components to prevent them from + * extending beyond the UMV borders assuming 16x16 block size */ + x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); + x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); + x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + + (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); + x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + + (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); + + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); + + recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); + recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); + + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; +#if 0 // FIXME + /* Copy current MB to a work buffer */ + RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, + x->src.y_stride, + x->thismb, 16); +#endif + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp8_activity_masking(cpi, x); + /* Is segmentation enabled */ + if (xd->segmentation_enabled) + { + /* Code to set segment id in xd->mbmi.segment_id */ + if (xd->update_mb_segmentation_map) + xd->mode_info_context->mbmi.segment_id = + cpi->segmentation_map[map_index] && + cpi->segmentation_map[map_index + 1] && + cpi->segmentation_map[map_index + cm->mb_cols] && + cpi->segmentation_map[map_index + cm->mb_cols + 1]; + else + xd->mode_info_context->mbmi.segment_id = + cm->last_frame_seg_map[map_index] && + cm->last_frame_seg_map[map_index + 1] && + cm->last_frame_seg_map[map_index + cm->mb_cols] && + cm->last_frame_seg_map[map_index + cm->mb_cols + 1]; + if (xd->mode_info_context->mbmi.segment_id > 3) + xd->mode_info_context->mbmi.segment_id = 0; + + vp8cx_mb_init_quantizer(cpi, x); + } + else + /* Set to Segment 0 by default */ + xd->mode_info_context->mbmi.segment_id = 0; + + x->active_ptr = cpi->active_map + map_index; + + cpi->update_context = 0; // TODO Do we need this now?? + + /* Find best coding mode & reconstruct the MB so it is available + * as a predictor for MBs that follow in the SB */ + if (cm->frame_type == KEY_FRAME) + { + vp8_rd_pick_intra_mode_sb(cpi, x, + totalrate, + totaldist); + + /* Save the coding context */ + vpx_memcpy(&x->sb_context[0].mic, xd->mode_info_context, + sizeof(MODE_INFO)); + } + else + { + if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && + !segfeature_active( xd, 0, SEG_LVL_REF_FRAME ) && + segfeature_active( xd, 1, SEG_LVL_REF_FRAME ) && + check_segref(xd, 1, INTRA_FRAME) + + check_segref(xd, 1, LAST_FRAME) + + check_segref(xd, 1, GOLDEN_FRAME) + + check_segref(xd, 1, ALTREF_FRAME) == 1) + { + cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; + } + else + { + cpi->seg0_progress = + (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols) << 16) / cm->MBs; + } + + vp8_rd_pick_inter_mode_sb(cpi, x, + recon_yoffset, + recon_uvoffset, + totalrate, + totaldist); + } + + /* Restore L & A coding context to those in place on entry */ + vpx_memcpy (cm->left_context, + left_context, + sizeof(left_context)); + vpx_memcpy (initial_above_context_ptr, + above_context, + sizeof(above_context)); +} +#endif + static void encode_sb(VP8_COMP *cpi, VP8_COMMON *cm, int mbrow, @@ -679,6 +852,7 @@ static void encode_sb(VP8_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, TOKENEXTRA **tp) { + VP8_COMMON *pc = cm; int i; int map_index; int mb_row, mb_col; @@ -733,22 +907,19 @@ static void encode_sb(VP8_COMP *cpi, // Restore MB state to that when it was picked #if CONFIG_SUPERBLOCKS - if (x->encode_as_sb) + if (xd->mode_info_context->mbmi.encoded_as_sb) { update_state(cpi, x, &x->sb_context[i]); - else + cpi->sb_count++; + } else #endif update_state(cpi, x, &x->mb_context[i]); - // Copy in the appropriate left context - vpx_memcpy(&cm->left_context, - &cpi->left_context[i >> 1], - sizeof(ENTROPY_CONTEXT_PLANES)); - map_index = (mb_row * cpi->common.mb_cols) + mb_col; x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; // reset above block coeffs xd->above_context = cm->above_context + mb_col; + xd->left_context = cm->left_context + (i >> 1); // Set up distance of MB to edge of the frame in 1/8th pel units xd->mb_to_top_edge = -((mb_row * 16) << 3); @@ -756,24 +927,28 @@ static void encode_sb(VP8_COMP *cpi, xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 16x16 block size - x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); - #if CONFIG_SUPERBLOCKS - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 32x32 block size - x->mv_row_min_sb = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_col_min_sb = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_row_max_sb = ((cm->mb_rows - mb_row) * 16 + - (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); - x->mv_col_max_sb = ((cm->mb_cols - mb_col) * 16 + - (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); + if (xd->mode_info_context->mbmi.encoded_as_sb) { + // Set up limit values for MV components to prevent them from + // extending beyond the UMV borders assuming 32x32 block size + x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); + x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); + x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + + (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); + x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + + (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); + } else { +#endif + // Set up limit values for MV components to prevent them from + // extending beyond the UMV borders assuming 16x16 block size + x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); + x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); + x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + + (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); + x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + + (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); +#if CONFIG_SUPERBLOCKS + } #endif xd->up_available = (mb_row != 0); @@ -796,24 +971,21 @@ static void encode_sb(VP8_COMP *cpi, // Is segmentation enabled if (xd->segmentation_enabled) { - // Code to set segment id in xd->mbmi.segment_id - if (cpi->segmentation_map[map_index] <= 3) - mbmi->segment_id = cpi->segmentation_map[map_index]; - else - mbmi->segment_id = 0; - vp8cx_mb_init_quantizer(cpi, x); - } else - // Set to Segment 0 by default - mbmi->segment_id = 0; + } x->active_ptr = cpi->active_map + map_index; cpi->update_context = 0; if (cm->frame_type == KEY_FRAME) { - vp8cx_encode_intra_macro_block(cpi, x, tp, 1); - // Note the encoder may have changed the segment_id +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) + vp8cx_encode_intra_super_block(cpi, x, tp, mb_col); + else +#endif + vp8cx_encode_intra_macro_block(cpi, x, tp, 1); + // Note the encoder may have changed the segment_id #ifdef MODE_STATS y_modes[mbmi->mode]++; @@ -822,9 +994,25 @@ static void encode_sb(VP8_COMP *cpi, unsigned char *segment_id; int seg_ref_active; - vp8cx_encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 1); - // Note the encoder may have changed the segment_id + if (xd->mode_info_context->mbmi.ref_frame) { + unsigned char pred_context; + + pred_context = get_pred_context(cm, xd, PRED_COMP); + + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) + cpi->single_pred_count[pred_context]++; + else + cpi->comp_pred_count[pred_context]++; + } + +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) + vp8cx_encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, mb_col, mb_row); + else +#endif + vp8cx_encode_inter_macroblock(cpi, x, tp, + recon_yoffset, recon_uvoffset, 1); + // Note the encoder may have changed the segment_id #ifdef MODE_STATS inter_y_modes[mbmi->mode]++; @@ -864,10 +1052,20 @@ static void encode_sb(VP8_COMP *cpi, // TODO Partitioning is broken! cpi->tplist[mb_row].stop = *tp; - // Copy back updated left context - vpx_memcpy(&cpi->left_context[i >> 1], - &cm->left_context, - sizeof(ENTROPY_CONTEXT_PLANES)); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + x->src.y_buffer += 32; + x->src.u_buffer += 16; + x->src.v_buffer += 16; + + x->gf_active_ptr += 2; + x->partition_info += 2; + xd->mode_info_context += 2; + xd->prev_mode_info_context += 2; + + break; + } +#endif // Next MB mb_row += dy; @@ -911,14 +1109,13 @@ void encode_sb_row(VP8_COMP *cpi, int mb_cols = cm->mb_cols; // Initialize the left context for the new SB row - vpx_memset(cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(&cm->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memset(cm->left_context, 0, sizeof(cm->left_context)); // Code each SB in the row for (mb_col = 0; mb_col < mb_cols; mb_col += 2) { - int mb_rate = 0; + int mb_rate = 0, mb_dist = 0; #if CONFIG_SUPERBLOCKS - int sb_rate = INT_MAX; + int sb_rate = INT_MAX, sb_dist; #endif #if CONFIG_DEBUG @@ -930,8 +1127,14 @@ void encode_sb_row(VP8_COMP *cpi, unsigned char *vb = x->src.v_buffer; #endif +#if CONFIG_SUPERBLOCKS // Pick modes assuming the SB is coded as 4 independent MBs - pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate); + xd->mode_info_context->mbmi.encoded_as_sb = 0; +#endif + pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate, &mb_dist); +#if CONFIG_SUPERBLOCKS + mb_rate += vp8_cost_bit(cm->sb_coded, 0); +#endif x->src.y_buffer -= 32; x->src.u_buffer -= 16; @@ -952,21 +1155,40 @@ void encode_sb_row(VP8_COMP *cpi, #endif #if CONFIG_SUPERBLOCKS - // Pick a mode assuming that it applies all 4 of the MBs in the SB - pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, &sb_rate); + if (!((( mb_cols & 1) && mb_col == mb_cols - 1) || + ((cm->mb_rows & 1) && mb_row == cm->mb_rows - 1))) { + /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ + xd->mode_info_context->mbmi.encoded_as_sb = 1; + pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &sb_rate, &sb_dist); + sb_rate += vp8_cost_bit(cm->sb_coded, 1); + } - // Decide whether to encode as a SB or 4xMBs - if (sb_rate < mb_rate) { - x->encode_as_sb = 1; + /* Decide whether to encode as a SB or 4xMBs */ + if (sb_rate < INT_MAX && + RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < + RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { + xd->mode_info_context->mbmi.encoded_as_sb = 1; + xd->mode_info_context[1].mbmi.encoded_as_sb = 1; + xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 1; + xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 1; *totalrate += sb_rate; } else #endif { - x->encode_as_sb = 0; +#if CONFIG_SUPERBLOCKS + xd->mode_info_context->mbmi.encoded_as_sb = 0; + if (cm->mb_cols - 1 > mb_col) + xd->mode_info_context[1].mbmi.encoded_as_sb = 0; + if (cm->mb_rows - 1 > mb_row) { + xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 0; + if (cm->mb_cols - 1 > mb_col) + xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 0; + } +#endif *totalrate += mb_rate; } - // Encode SB using best computed mode(s) + /* Encode SB using best computed mode(s) */ encode_sb(cpi, cm, mb_row, mb_col, x, xd, tp); #if CONFIG_DEBUG @@ -1038,8 +1260,6 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) { xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; - xd->left_context = &cm->left_context; - vp8_zero(cpi->count_mb_ref_frame_usage) vp8_zero(cpi->bmode_count) vp8_zero(cpi->ymode_count) @@ -1049,6 +1269,10 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) { vp8_zero(cpi->mbsplit_count) vp8_zero(cpi->common.fc.mv_ref_ct) vp8_zero(cpi->common.fc.mv_ref_ct_a) +#if CONFIG_SUPERBLOCKS + vp8_zero(cpi->sb_ymode_count) + cpi->sb_count = 0; +#endif // vp8_zero(cpi->uv_mode_count) x->mvc = cm->fc.mvc; @@ -1380,7 +1604,12 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) { } #endif - ++cpi->ymode_count[m]; +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + ++cpi->sb_ymode_count[m]; + } else +#endif + ++cpi->ymode_count[m]; if (m != I8X8_PRED) ++cpi->y_uv_mode_count[m][uvm]; else { @@ -1418,6 +1647,160 @@ static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x) { #endif } +#if CONFIG_SUPERBLOCKS +static void update_sb_skip_coeff_state(VP8_COMP *cpi, + MACROBLOCK *x, + ENTROPY_CONTEXT_PLANES ta[4], + ENTROPY_CONTEXT_PLANES tl[4], + TOKENEXTRA *t[4], + TOKENEXTRA **tp, + int skip[4]) +{ + TOKENEXTRA tokens[4][16 * 24]; + int n_tokens[4], n; + + // if there were no skips, we don't need to do anything + if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[0] && skip[1] && skip[2] && skip[3]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + n_tokens[0] = t[1] - t[0]; + n_tokens[1] = t[2] - t[1]; + n_tokens[2] = t[3] - t[2]; + n_tokens[3] = *tp - t[3]; + if (n_tokens[0]) + memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0])); + if (n_tokens[1]) + memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0])); + if (n_tokens[2]) + memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0])); + if (n_tokens[3]) + memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0])); + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 4; n++) { + TOKENEXTRA *tbak = *tp; + if (skip[n]) { + x->e_mbd.above_context = &ta[n]; + x->e_mbd.left_context = &tl[n]; + vp8_stuff_mb_8x8(cpi, &x->e_mbd, tp, 0); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } +} + +void vp8cx_encode_intra_super_block(VP8_COMP *cpi, + MACROBLOCK *x, + TOKENEXTRA **t, + int mb_col) { + const int output_enabled = 1; + int n; + MACROBLOCKD *xd = &x->e_mbd; + VP8_COMMON *cm = &cpi->common; + const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; + const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + const VP8_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd); + TOKENEXTRA *tp[4]; + int skip[4]; + MODE_INFO *mi = x->e_mbd.mode_info_context; + ENTROPY_CONTEXT_PLANES ta[4], tl[4]; + + if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) { + adjust_act_zbin(cpi, x); + vp8_update_zbin_extra(cpi, x); + } + + /* test code: set transform size based on mode selection */ + if (cpi->common.txfm_mode == ALLOW_8X8) { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8; + x->e_mbd.mode_info_context[1].mbmi.txfm_size = TX_8X8; + x->e_mbd.mode_info_context[cm->mode_info_stride].mbmi.txfm_size = TX_8X8; + x->e_mbd.mode_info_context[cm->mode_info_stride+1].mbmi.txfm_size = TX_8X8; + cpi->t8x8_count++; + } else { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; + cpi->t4x4_count++; + } + + RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sby_s)(&x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sbuv_s)(&x->e_mbd); + + assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8); + for (n = 0; n < 4; n++) + { + int x_idx = n & 1, y_idx = n >> 1; + + xd->above_context = cm->above_context + mb_col + (n & 1); + xd->left_context = cm->left_context + (n >> 1); + + vp8_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp8_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp8_transform_intra_mby_8x8(x); + vp8_transform_mbuv_8x8(x); + vp8_quantize_mby_8x8(x); + vp8_quantize_mbuv_8x8(x); + if (x->optimize) { + vp8_optimize_mby_8x8(x, rtcd); + vp8_optimize_mbuv_8x8(x, rtcd); + } + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + vp8_recon_mby_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp8_recon_mbuv_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + + if (output_enabled) { + memcpy(&ta[n], xd->above_context, sizeof(ta[n])); + memcpy(&tl[n], xd->left_context, sizeof(tl[n])); + tp[n] = *t; + xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride; + vp8_tokenize_mb(cpi, &x->e_mbd, t, 0); + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + } + } + + if (output_enabled) { + // Tokenize + xd->mode_info_context = mi; + sum_intra_stats(cpi, x); + update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); + } +} +#endif + void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, @@ -1484,6 +1867,9 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x, unsigned char ref_pred_flag; x->skip = 0; +#if CONFIG_SUPERBLOCKS + assert(!xd->mode_info_context->mbmi.encoded_as_sb); +#endif #if CONFIG_SWITCHABLE_INTERP vp8_setup_interp_filters(xd, mbmi->interp_filter, cm); @@ -1648,3 +2034,190 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x, } } } + +#if CONFIG_SUPERBLOCKS +void vp8cx_encode_inter_superblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, int mb_col, int mb_row) { + const int output_enabled = 1; + VP8_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; + const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + const VP8_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd); + int mis = xd->mode_info_stride; + unsigned int segment_id = xd->mode_info_context->mbmi.segment_id; + int seg_ref_active; + unsigned char ref_pred_flag; + int n; + TOKENEXTRA *tp[4]; + int skip[4]; + MODE_INFO *mi = x->e_mbd.mode_info_context; + ENTROPY_CONTEXT_PLANES ta[4], tl[4]; + + x->skip = 0; + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + // Adjust the zbin based on this MB rate. + adjust_act_zbin(cpi, x); + } + + { + // Experimental code. Special case for gf and arf zeromv modes. + // Increase zbin size to suppress noise + cpi->zbin_mode_boost = 0; + if (cpi->zbin_mode_boost_enabled) { + if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { + if (xd->mode_info_context->mbmi.mode == ZEROMV) { + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } else if (xd->mode_info_context->mbmi.mode == SPLITMV) + cpi->zbin_mode_boost = 0; + else + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } + } + + vp8_update_zbin_extra(cpi, x); + } + + seg_ref_active = segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); + + // SET VARIOUS PREDICTION FLAGS + + // Did the chosen reference frame match its predicted value. + ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == + get_pred_ref(cm, xd))); + set_pred_flag(xd, PRED_REF, ref_pred_flag); + + /* test code: set transform size based on mode selection */ + if (cpi->common.txfm_mode == ALLOW_8X8 + && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED + && x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8; + cpi->t8x8_count++; + } else { + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; + cpi->t4x4_count++; + } + + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sby_s)(&x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sbuv_s)(&x->e_mbd); + } else { + int ref_fb_idx; + + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = cpi->common.gld_fb_idx; + else + ref_fb_idx = cpi->common.alt_fb_idx; + + xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + + if (xd->mode_info_context->mbmi.second_ref_frame) { + int second_ref_fb_idx; + + if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) + second_ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = cpi->common.gld_fb_idx; + else + second_ref_fb_idx = cpi->common.alt_fb_idx; + + xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + + recon_yoffset; + xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + + recon_uvoffset; + xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + + recon_uvoffset; + } + + vp8_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + + assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8); + for (n = 0; n < 4; n++) + { + int x_idx = n & 1, y_idx = n >> 1; + + vp8_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp8_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp8_transform_intra_mby_8x8(x); + } else { + vp8_transform_mby_8x8(x); + } + vp8_transform_mbuv_8x8(x); + vp8_quantize_mby_8x8(x); + vp8_quantize_mbuv_8x8(x); + if (x->optimize) { + vp8_optimize_mby_8x8(x, rtcd); + vp8_optimize_mbuv_8x8(x, rtcd); + } + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + vp8_recon_mby_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp8_recon_mbuv_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + + if (!x->skip) { + if (output_enabled) { + xd->left_context = cm->left_context + (n >> 1); + xd->above_context = cm->above_context + mb_col + (n >> 1); + memcpy(&ta[n], xd->above_context, sizeof(ta[n])); + memcpy(&tl[n], xd->left_context, sizeof(tl[n])); + tp[n] = *t; + xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride; + vp8_tokenize_mb(cpi, &x->e_mbd, t, 0); + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + } + } else { + int mb_skip_context = + cpi->common.mb_no_coeff_skip ? + (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + + (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff : + 0; + if (cpi->common.mb_no_coeff_skip) { + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff = 1; + xd->left_context = cm->left_context + (n >> 1); + xd->above_context = cm->above_context + mb_col + (n >> 1); + memcpy(&ta[n], xd->above_context, sizeof(ta[n])); + memcpy(&tl[n], xd->left_context, sizeof(tl[n])); + tp[n] = *t; + cpi->skip_true_count[mb_skip_context]++; + vp8_fix_contexts(xd); + } else { + vp8_stuff_mb(cpi, xd, t, 0); + xd->mode_info_context->mbmi.mb_skip_coeff = 0; + cpi->skip_false_count[mb_skip_context]++; + } + } + } + + xd->mode_info_context = mi; + update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); +} +#endif diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index e03b47e2ced1c2b42fef23f32146c0be3d91d020..473f8ba3d531de14576dabb22e299c603fc41224 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -67,11 +67,10 @@ void vp8_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { } } -void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) { +void vp8_subtract_mbuv_s_c(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, + unsigned char *upred, unsigned char *vpred, int dst_stride) { short *udiff = diff + 256; short *vdiff = diff + 320; - unsigned char *upred = pred + 256; - unsigned char *vpred = pred + 320; int r, c; @@ -81,8 +80,8 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, } udiff += 8; - upred += 8; - usrc += stride; + upred += dst_stride; + usrc += src_stride; } for (r = 0; r < 8; r++) { @@ -91,12 +90,19 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, } vdiff += 8; - vpred += 8; - vsrc += stride; + vpred += dst_stride; + vsrc += src_stride; } } -void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride) { +void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) { + unsigned char *upred = pred + 256; + unsigned char *vpred = pred + 320; + + vp8_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8); +} + +void vp8_subtract_mby_s_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int dst_stride) { int r, c; for (r = 0; r < 16; r++) { @@ -105,11 +111,16 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in } diff += 16; - pred += 16; - src += stride; + pred += dst_stride; + src += src_stride; } } +void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride) +{ + vp8_subtract_mby_s_c(diff, src, stride, pred, 16); +} + static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { BLOCK *b = &x->block[0]; diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 6390f3fe4fa6446e195e61e356975c37971d9fbd..6a5bf59d50d44ec99df1c0ad271a864dbfaf7429 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -23,24 +23,36 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER void vp8_cmachine_specific_config(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT cpi->rtcd.common = &cpi->common.rtcd; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.sad32x32 = vp8_sad32x32_c; +#endif cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.sad32x32x3 = vp8_sad32x32x3_c; +#endif cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_c; cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_c; cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_c; cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c; cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.sad32x32x8 = vp8_sad32x32x8_c; +#endif cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c; cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c; cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c; cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c; cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.sad32x32x4d = vp8_sad32x32x4d_c; +#endif cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c; cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c; cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c; @@ -54,16 +66,34 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) { cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.var32x32 = vp8_variance32x32_c; +#endif cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.subpixvar32x32 = vp8_sub_pixel_variance32x32_c; +#endif cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.halfpixvar32x32_h = vp8_variance_halfpixvar32x32_h_c; +#endif cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.halfpixvar32x32_v = vp8_variance_halfpixvar32x32_v_c; +#endif cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.halfpixvar32x32_hv = vp8_variance_halfpixvar32x32_hv_c; +#endif cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c; +#if CONFIG_SUPERBLOCKS + cpi->rtcd.variance.subpixmse32x32 = vp8_sub_pixel_mse32x32_c; +#endif cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index ba4cd897dbc25f27bde5420f0a237b1709886128..a0621b649b3e426d95dffcbb623ae26cb830175e 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -243,7 +243,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int y_stride; int offset; -#if ARCH_X86 || ARCH_X86_64 +#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; unsigned char *y; int buf_r1, buf_r2, buf_c1, buf_c2; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index bcbc857666d293ce0b88867237c04df688011fb0..deff0db08522e4ffd0304c9d3a07436a063e25f2 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -620,6 +620,42 @@ static void print_seg_map(VP8_COMP *cpi) { fclose(statsfile); } +static void update_reference_segmentation_map(VP8_COMP *cpi) { + VP8_COMMON *cm = &cpi->common; + int row, col, sb_rows = (cm->mb_rows + 1) >> 1, sb_cols = (cm->mb_cols + 1) >> 1; + MODE_INFO *mi = cm->mi; + uint8_t *segmap = cpi->segmentation_map; + uint8_t *segcache = cm->last_frame_seg_map; + + for (row = 0; row < sb_rows; row++) { + for (col = 0; col < sb_cols; col++) { + MODE_INFO *miptr = mi + col * 2; + uint8_t *seg = segmap + col * 2; + uint8_t *cache = segcache + col * 2; +#if CONFIG_SUPERBLOCKS + if (miptr->mbmi.encoded_as_sb) { + cache[0] = cache[1] = cache[cm->mb_cols] = cache[cm->mb_cols + 1] = + miptr->mbmi.segment_id; + } else +#endif + { + cache[0] = miptr[0].mbmi.segment_id; + if (!(cm->mb_cols & 1) || col < sb_cols - 1) + cache[1] = miptr[1].mbmi.segment_id; + if (!(cm->mb_rows & 1) || row < sb_rows - 1) { + cache[cm->mb_cols] = miptr[cm->mode_info_stride].mbmi.segment_id; + if (!(cm->mb_cols & 1) || col < sb_cols - 1) + cache[1] = miptr[1].mbmi.segment_id; + cache[cm->mb_cols + 1] = miptr[cm->mode_info_stride + 1].mbmi.segment_id; + } + } + } + segmap += 2 * cm->mb_cols; + segcache += 2 * cm->mb_cols; + mi += 2 * cm->mode_info_stride; + } +} + static void set_default_lf_deltas(VP8_COMP *cpi) { cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1; cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; @@ -1736,6 +1772,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { cm->prob_last_coded = 128; cm->prob_gf_coded = 128; cm->prob_intra_coded = 63; +#if CONFIG_SUPERBLOCKS + cm->sb_coded = 200; +#endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; @@ -1919,6 +1958,18 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { init_mv_ref_counts(); #endif +#if CONFIG_SUPERBLOCKS + cpi->fn_ptr[BLOCK_32X32].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32); + cpi->fn_ptr[BLOCK_32X32].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32); + cpi->fn_ptr[BLOCK_32X32].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar32x32); + cpi->fn_ptr[BLOCK_32X32].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_h); + cpi->fn_ptr[BLOCK_32X32].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_v); + cpi->fn_ptr[BLOCK_32X32].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_hv); + cpi->fn_ptr[BLOCK_32X32].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x3); + cpi->fn_ptr[BLOCK_32X32].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x8); + cpi->fn_ptr[BLOCK_32X32].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x4d); +#endif + cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); @@ -3618,6 +3669,10 @@ static void encode_frame_to_data_rate cpi->dummy_packing = 0; vp8_pack_bitstream(cpi, dest, size); + if (cpi->mb.e_mbd.update_mb_segmentation_map) { + update_reference_segmentation_map(cpi); + } + #if CONFIG_PRED_FILTER // Select the prediction filtering mode to use for the // next frame based on the current frame selections diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index ff3a2110719179dbc3a793f15a8cb63bfed9f26e..7fb7dd2ff4b5c13c920e9d4fad324d79e0d34266 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -359,7 +359,9 @@ enum { BLOCK_8X8, BLOCK_4X4, BLOCK_16X16, - BLOCK_MAX_SEGMENTS + BLOCK_MAX_SEGMENTS, + BLOCK_32X32 = BLOCK_MAX_SEGMENTS, + BLOCK_MAX_SB_SEGMENTS, }; typedef struct VP8_COMP { @@ -528,6 +530,10 @@ typedef struct VP8_COMP { int cq_target_quality; +#if CONFIG_SUPERBLOCKS + int sb_count; + int sb_ymode_count [VP8_I32X32_MODES]; +#endif int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ int bmode_count [VP8_BINTRAMODES]; int i8x8_mode_count [VP8_I8X8_MODES]; @@ -628,7 +634,7 @@ typedef struct VP8_COMP { vp8_full_search_fn_t full_search_sad; vp8_refining_search_fn_t refining_search_sad; vp8_diamond_search_fn_t diamond_search_sad; - vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; + vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SB_SEGMENTS]; uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; @@ -732,9 +738,6 @@ typedef struct VP8_COMP { int droppable; - // Global store for SB left contexts, one for each MB row in the SB - ENTROPY_CONTEXT_PLANES left_context[2]; - // TODO Do we still need this?? int update_context; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 720736f33555ef44f3e0c6d0cbac88cef9461202..92a80ecbaeeec6e77f7d7fb9ed2a10ef61371af3 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -718,7 +718,7 @@ static void macro_block_yrd(MACROBLOCK *mb, *Rate = vp8_rdcost_mby(mb); } -static int vp8_rdcost_mby_8x8(MACROBLOCK *mb) { +static int vp8_rdcost_mby_8x8(MACROBLOCK *mb, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -726,11 +726,16 @@ static int vp8_rdcost_mby_8x8(MACROBLOCK *mb) { ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + } else { + ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context; + tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context; + } for (b = 0; b < 16; b += 4) cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC, @@ -775,7 +780,7 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, *Distortion = (d >> 2); // rate - *Rate = vp8_rdcost_mby_8x8(mb); + *Rate = vp8_rdcost_mby_8x8(mb, 1); } #if CONFIG_TX16X16 @@ -823,6 +828,66 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { d[12] = p[12]; } +#if CONFIG_SUPERBLOCKS +static void super_block_yrd_8x8(MACROBLOCK *x, + int *rate, + int *distortion, + const VP8_ENCODER_RTCD *rtcd, int *skip) +{ + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const by2 = x->block + 24; + BLOCKD *const bdy2 = xd->block + 24; + int d = 0, r = 0, n; + const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + ENTROPY_CONTEXT_PLANES *ta = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl = xd->left_context; + ENTROPY_CONTEXT_PLANES t_above[2]; + ENTROPY_CONTEXT_PLANES t_left[2]; + int skippable = 1; + + vpx_memcpy(t_above, xd->above_context, sizeof(t_above)); + vpx_memcpy(t_left, xd->left_context, sizeof(t_left)); + + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + + vp8_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp8_transform_mby_8x8(x); + vp8_quantize_mby_8x8(x); + + /* remove 1st order dc to properly combine 1st/2nd order distortion */ + x->coeff[ 0] = 0; + x->coeff[ 64] = 0; + x->coeff[128] = 0; + x->coeff[192] = 0; + xd->dqcoeff[ 0] = 0; + xd->dqcoeff[ 64] = 0; + xd->dqcoeff[128] = 0; + xd->dqcoeff[192] = 0; + + d += ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, 0); + d += ENCODEMB_INVOKE(&rtcd->encodemb, berr)(by2->coeff, bdy2->dqcoeff, 16); + xd->above_context = ta + x_idx; + xd->left_context = tl + y_idx; + r += vp8_rdcost_mby_8x8(x, 0); + skippable = skippable && mby_is_skippable_8x8(xd); + } + + *distortion = (d >> 2); + *rate = r; + if (skip) *skip = skippable; + xd->above_context = ta; + xd->left_context = tl; + vpx_memcpy(xd->above_context, &t_above, sizeof(t_above)); + vpx_memcpy(xd->left_context, &t_left, sizeof(t_left)); +} +#endif + static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor) { const unsigned int *p = (const unsigned int *)predictor; unsigned int *d = (unsigned int *)dst; @@ -1062,6 +1127,45 @@ static int64_t rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rat return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } +#if CONFIG_SUPERBLOCKS +static int64_t rd_pick_intra_sby_mode(VP8_COMP *cpi, + MACROBLOCK *x, + int *rate, + int *rate_tokenonly, + int *distortion) { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + int this_rate, this_rate_tokenonly; + int this_distortion; + int64_t best_rd = INT64_MAX, this_rd; + + /* Y Search for 32x32 intra prediction mode */ + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + x->e_mbd.mode_info_context->mbmi.mode = mode; + RECON_INVOKE(&cpi->common.rtcd.recon, + build_intra_predictors_sby_s)(&x->e_mbd); + + super_block_yrd_8x8(x, &this_rate_tokenonly, + &this_distortion, IF_RTCD(&cpi->rtcd), NULL); + this_rate = this_rate_tokenonly + + x->mbmode_cost[x->e_mbd.frame_type] + [x->e_mbd.mode_info_context->mbmi.mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + } + } + + x->e_mbd.mode_info_context->mbmi.mode = mode_selected; + + return best_rd; +} +#endif static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, @@ -1372,18 +1476,23 @@ static int64_t rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static int rd_cost_mbuv_8x8(MACROBLOCK *mb) { +static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { int b; int cost = 0; MACROBLOCKD *xd = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + } else { + ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context; + tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context; + } for (b = 16; b < 24; b += 4) cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, @@ -1393,6 +1502,54 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb) { return cost; } +#if CONFIG_SUPERBLOCKS +static int64_t rd_inter32x32_uv_8x8(VP8_COMP *cpi, MACROBLOCK *x, int *rate, + int *distortion, int fullpixel, int *skip) { + MACROBLOCKD *xd = &x->e_mbd; + int n, r = 0, d = 0; + const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + int skippable = 1; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT_PLANES *ta = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl = xd->left_context; + + memcpy(t_above, xd->above_context, sizeof(t_above)); + memcpy(t_left, xd->left_context, sizeof(t_left)); + + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + + vp8_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + + vp8_transform_mbuv_8x8(x); + vp8_quantize_mbuv_8x8(x); + + xd->above_context = ta + x_idx; + xd->left_context = tl + y_idx; + r += rd_cost_mbuv_8x8(x, 0); + d += ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4; + skippable = skippable && mbuv_is_skippable_8x8(xd); + } + + *rate = r; + *distortion = d; + if (skip) *skip = skippable; + xd->left_context = tl; + xd->above_context = ta; + memcpy(xd->above_context, t_above, sizeof(t_above)); + memcpy(xd->left_context, t_left, sizeof(t_left)); + + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); +} +#endif static int64_t rd_inter16x16_uv_8x8(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel) { @@ -1403,7 +1560,7 @@ static int64_t rd_inter16x16_uv_8x8(VP8_COMP *cpi, MACROBLOCK *x, int *rate, vp8_quantize_mbuv_8x8(x); - *rate = rd_cost_mbuv_8x8(x); + *rate = rd_cost_mbuv_8x8(x, 1); *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); @@ -1527,7 +1684,7 @@ static void rd_pick_intra_mbuv_mode_8x8(VP8_COMP *cpi, vp8_quantize_mbuv_8x8(x); - rate_to = rd_cost_mbuv_8x8(x); + rate_to = rd_cost_mbuv_8x8(x, 1); rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4; @@ -1546,6 +1703,91 @@ static void rd_pick_intra_mbuv_mode_8x8(VP8_COMP *cpi, mbmi->uv_mode = mode_selected; } +#if CONFIG_SUPERBLOCKS +static void super_block_uvrd_8x8(MACROBLOCK *x, + int *rate, + int *distortion, + const VP8_ENCODER_RTCD *rtcd) { + MACROBLOCKD *const xd = &x->e_mbd; + int d = 0, r = 0, n; + const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT_PLANES *ta = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl = xd->left_context; + + memcpy(t_above, xd->above_context, sizeof(t_above)); + memcpy(t_left, xd->left_context, sizeof(t_left)); + + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + + vp8_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp8_transform_mbuv_8x8(x); + vp8_quantize_mbuv_8x8(x); + + d += ENCODEMB_INVOKE(&rtcd->encodemb, mbuverr)(x) >> 2; + xd->above_context = ta + x_idx; + xd->left_context = tl + y_idx; + r += rd_cost_mbuv_8x8(x, 0); + } + + xd->above_context = ta; + xd->left_context = tl; + *distortion = (d >> 2); + *rate = r; + + xd->left_context = tl; + xd->above_context = ta; + memcpy(xd->above_context, t_above, sizeof(t_above)); + memcpy(xd->left_context, t_left, sizeof(t_left)); +} + +static int64_t rd_pick_intra_sbuv_mode(VP8_COMP *cpi, + MACROBLOCK *x, + int *rate, + int *rate_tokenonly, + int *distortion) { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + int64_t best_rd = INT64_MAX, this_rd; + int this_rate_tokenonly, this_rate; + int this_distortion; + + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + x->e_mbd.mode_info_context->mbmi.uv_mode = mode; + RECON_INVOKE(&cpi->rtcd.common->recon, + build_intra_predictors_sbuv_s)(&x->e_mbd); + + super_block_uvrd_8x8(x, &this_rate_tokenonly, + &this_distortion, IF_RTCD(&cpi->rtcd)); + this_rate = this_rate_tokenonly + + x->mbmode_cost[x->e_mbd.frame_type] + [x->e_mbd.mode_info_context->mbmi.mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + } + } + + x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; + + return best_rd; +} +#endif + int vp8_cost_mv_ref(VP8_COMP *cpi, MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) { @@ -2568,25 +2810,33 @@ static void vp8_estimate_ref_frame_costs(VP8_COMP *cpi, int segment_id, unsigned } } -static void store_coding_context(MACROBLOCK *x, int mb_index, +static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, PARTITION_INFO *partition, int_mv *ref_mv, - int_mv *second_ref_mv) { + int_mv *second_ref_mv, + int single_pred_diff, + int comp_pred_diff, + int hybrid_pred_diff) { MACROBLOCKD *xd = &x->e_mbd; // Take a snapshot of the coding context so it can be // restored if we decide to encode this way - x->mb_context[mb_index].best_mode_index = mode_index; - vpx_memcpy(&x->mb_context[mb_index].mic, xd->mode_info_context, + ctx->best_mode_index = mode_index; + vpx_memcpy(&ctx->mic, xd->mode_info_context, sizeof(MODE_INFO)); - vpx_memcpy(&x->mb_context[mb_index].partition_info, partition, - sizeof(PARTITION_INFO)); - x->mb_context[mb_index].best_ref_mv.as_int = ref_mv->as_int; - x->mb_context[mb_index].second_best_ref_mv.as_int = second_ref_mv->as_int; - - // x->mb_context[mb_index].rddiv = x->rddiv; - // x->mb_context[mb_index].rdmult = x->rdmult; + if (partition) + vpx_memcpy(&ctx->partition_info, partition, + sizeof(PARTITION_INFO)); + ctx->best_ref_mv.as_int = ref_mv->as_int; + ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; + + // ctx[mb_index].rddiv = x->rddiv; + // ctx[mb_index].rdmult = x->rdmult; + + ctx->single_pred_diff = single_pred_diff; + ctx->comp_pred_diff = comp_pred_diff; + ctx->hybrid_pred_diff = hybrid_pred_diff; } static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode, @@ -3464,7 +3714,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } #endif - if (x->skip) + if (x->skip && !mode_excluded) break; } @@ -3557,16 +3807,36 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } end: - // TODO Save these to add in only if MB coding mode is selected? - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - cpi->rd_comp_pred_diff[i] += best_pred_diff[i]; + store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, + &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], + &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], + best_pred_diff[0], best_pred_diff[1], best_pred_diff[2]); +} - store_coding_context(x, xd->mb_index, best_mode_index, &best_partition, - &frame_best_ref_mv[mbmi->ref_frame], - &frame_best_ref_mv[mbmi->second_ref_frame]); +#if CONFIG_SUPERBLOCKS +void vp8_rd_pick_intra_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, + int *returnrate, + int *returndist) { + int rate_y, rate_uv; + int rate_y_tokenonly, rate_uv_tokenonly; + int error_y, error_uv; + int dist_y, dist_uv; + + x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8; + + error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + &dist_uv); + error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y); + + // TODO(rbultje): add rate_uv + *returnrate = rate_y; + *returndist = dist_y + (dist_uv >> 2); } +#endif -int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) { +void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, + int *returnrate, int *returndist) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; int64_t error4x4, error16x16; @@ -3585,6 +3855,8 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) { int rate8x8, dist8x8; int mode16x16; int mode8x8[2][4]; + int dist; + int rateuv8, rateuv_tokenonly8, distuv8; mbmi->ref_frame = INTRA_FRAME; rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); @@ -3646,9 +3918,11 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) { rate += rate4x4; #endif mbmi->mode = B_PRED; + dist = dist4x4; } else { mbmi->mode = mode16x16; rate += rate16x16; + dist = dist16x16; } } else { if (error4x4 < error8x8) { @@ -3663,17 +3937,727 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) { rate += rate4x4; #endif mbmi->mode = B_PRED; + dist = dist4x4; } else { mbmi->mode = I8X8_PRED; set_i8x8_block_modes(x, mode8x8); rate += rate8x8; + dist = dist8x8; } } - return rate; + + // TODO(rbultje): should add rateuv here also + *returnrate = rate - rateuv; + *returndist = dist + (distuv >> 2); } -int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset) { +#if CONFIG_SUPERBLOCKS +int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, int *returndistortion) { + VP8_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + BLOCK *b = &x->block[0]; + BLOCKD *d = &xd->block[0]; + MB_PREDICTION_MODE this_mode; + MV_REFERENCE_FRAME ref_frame; + int mis = xd->mode_info_stride; + unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; + int comp_pred; + int_mv best_ref_mv, second_best_ref_mv; + int_mv mode_mv[MB_MODE_COUNT]; + int_mv frame_nearest_mv[4]; + int_mv frame_near_mv[4]; + int_mv frame_best_ref_mv[4]; + int_mv mc_search_result[4]; + int frame_mdcounts[4][4]; + unsigned char *y_buffer[4]; + unsigned char *u_buffer[4]; + unsigned char *v_buffer[4]; + static const int flag_list[4] = { 0, VP8_LAST_FLAG, VP8_GOLD_FLAG, VP8_ALT_FLAG }; + int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx, cpi->common.alt_fb_idx }; + int mdcounts[4]; + int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + int saddone = 0; + int sr = 0; // search range got from mv_pred(). It uses step_param levels. (0-7) + int64_t best_rd = INT64_MAX; + int64_t best_comp_rd = INT64_MAX; + int64_t best_single_rd = INT64_MAX; + int64_t best_hybrid_rd = INT64_MAX; + int64_t best_yrd = INT64_MAX; + MB_MODE_INFO best_mbmode; + int mode_index = 0; +#if 0 + PARTITION_INFO best_partition; + union b_mode_info best_bmodes[16]; +#endif + unsigned int ref_costs[MAX_REF_FRAMES]; + + xd->mode_info_context->mbmi.segment_id = segment_id; + vp8_estimate_ref_frame_costs(cpi, segment_id, ref_costs); + + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + if (cpi->ref_frame_flags & flag_list[ref_frame]) { + YV12_BUFFER_CONFIG *ref_buf = &cpi->common.yv12_fb[idx_list[ref_frame]]; + + vp8_find_near_mvs(xd, xd->mode_info_context, + xd->prev_mode_info_context, + &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame], + &frame_best_ref_mv[ref_frame], frame_mdcounts[ref_frame], + ref_frame, cpi->common.ref_frame_sign_bias); + + y_buffer[ref_frame] = ref_buf->y_buffer + recon_yoffset; + u_buffer[ref_frame] = ref_buf->u_buffer + recon_uvoffset; + v_buffer[ref_frame] = ref_buf->v_buffer + recon_uvoffset; + } + mc_search_result[ref_frame].as_int = INVALID_MV; + } + + for (mode_index = 0; mode_index < MAX_MODES; mode_index++) { + int_mv mvp; + int mode_excluded; + int64_t this_rd = INT64_MAX; + int disable_skip = 0; + int other_cost = 0; + int compmode_cost = 0; + int rate2 = 0; + int distortion2 = 0; + int rate_y = 0; + int rate_uv = 0; + int distortion_uv; + int distortion; + int skippable_y, skippable_uv; + + // Test best rd so far against threshold for trying this mode. + if (best_rd <= cpi->rd_threshes[mode_index]) { + continue; + } + + this_mode = vp8_mode_order[mode_index].mode; + ref_frame = vp8_mode_order[mode_index].ref_frame; + xd->mode_info_context->mbmi.ref_frame = ref_frame; + comp_pred = vp8_mode_order[mode_index].second_ref_frame != INTRA_FRAME; + xd->mode_info_context->mbmi.mode = this_mode; + xd->mode_info_context->mbmi.uv_mode = DC_PRED; +#if 0 && CONFIG_PRED_FILTER + xd->mode_info_context->mbmi.pred_filter_enabled = 0; +#endif + +#if 0 && CONFIG_COMP_INTRA_PRED + xd->mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); + xd->mode_info_context->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); +#endif + + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) + continue; + + // not yet supported or not superblocky + // TODO(rbultje): support intra coding + if (ref_frame == INTRA_FRAME || this_mode == SPLITMV) + continue; + + if (comp_pred) { + int second_ref; + + if (ref_frame == ALTREF_FRAME) { + second_ref = LAST_FRAME; + } else { + second_ref = ref_frame + 1; + } + if (!(cpi->ref_frame_flags & flag_list[second_ref])) + continue; + xd->mode_info_context->mbmi.second_ref_frame = second_ref; + + xd->second_pre.y_buffer = y_buffer[second_ref]; + xd->second_pre.u_buffer = u_buffer[second_ref]; + xd->second_pre.v_buffer = v_buffer[second_ref]; + second_best_ref_mv = frame_best_ref_mv[second_ref]; + mode_excluded = cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; + } else { + xd->mode_info_context->mbmi.second_ref_frame = INTRA_FRAME; + mode_excluded = cm->comp_pred_mode == COMP_PREDICTION_ONLY; + } + + xd->pre.y_buffer = y_buffer[ref_frame]; + xd->pre.u_buffer = u_buffer[ref_frame]; + xd->pre.v_buffer = v_buffer[ref_frame]; + mode_mv[ZEROMV].as_int = 0; + mode_mv[NEARESTMV] = frame_nearest_mv[ref_frame]; + mode_mv[NEARMV] = frame_near_mv[ref_frame]; + best_ref_mv = frame_best_ref_mv[ref_frame]; + vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts)); + + // If the segment reference frame feature is enabled.... + // then do nothing if the current ref frame is not allowed.. + if (segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && + !check_segref(xd, segment_id, ref_frame)) { + continue; + } + // If the segment mode feature is enabled.... + // then do nothing if the current mode is not allowed.. + else if (segfeature_active(xd, segment_id, SEG_LVL_MODE) && + (this_mode != get_segdata(xd, segment_id, SEG_LVL_MODE))) { + continue; + } + // Disable this drop out case if either the mode or ref frame + // segment level feature is enabled for this segment. This is to + // prevent the possibility that we end up unable to pick any mode. + else if (!segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && + !segfeature_active(xd, segment_id, SEG_LVL_MODE)) { + // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + // unless ARNR filtering is enabled in which case we want + // an unfiltered alternative + if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { + if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) { + continue; + } + } + } + + if (!comp_pred) { + switch (this_mode) { + case NEWMV: { + int thissme; + int bestsme = INT_MAX; + int step_param = cpi->sf.first_step; + int further_steps; + int n; + int do_refine = 1; /* If last step (1-away) of n-step search doesn't pick the center point as the best match, + we will do a final 1-away diamond refining search */ + int num00; + + int sadpb = x->sadperbit16; + int_mv mvp_full; + + int col_min = (best_ref_mv.as_mv.col >> 3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.col & 7) ? 1 : 0); + int row_min = (best_ref_mv.as_mv.row >> 3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.row & 7) ? 1 : 0); + int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL; + int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + + if (!saddone) { + vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]); + saddone = 1; + } + + vp8_mv_pred(cpi, xs, xd->mode_info_context, &mvp, + xd->mode_info_context->mbmi.ref_frame, + cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); + + mvp_full.as_mv.col = mvp.as_mv.col >> 3; + mvp_full.as_mv.row = mvp.as_mv.row >> 3; + + // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. + if (x->mv_col_min < col_min) + x->mv_col_min = col_min; + if (x->mv_col_max > col_max) + x->mv_col_max = col_max; + if (x->mv_row_min < row_min) + x->mv_row_min = row_min; + if (x->mv_row_max > row_max) + x->mv_row_max = row_max; + + // adjust search range according to sr from mv prediction + if (sr > step_param) + step_param = sr; + + // Initial step/diamond search + { + bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.as_mv.first, + step_param, sadpb, &num00, + &cpi->fn_ptr[BLOCK_32X32], + XMVCOST, &best_ref_mv); + mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int; + + // Further step/diamond searches as necessary + n = 0; + further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + + n = num00; + num00 = 0; + + /* If there won't be more n-step search, check to see if refining search is needed. */ + if (n > further_steps) + do_refine = 0; + + while (n < further_steps) { + n++; + + if (num00) + num00--; + else { + thissme = cpi->diamond_search_sad(x, b, d, &mvp_full, + &d->bmi.as_mv.first, step_param + n, sadpb, &num00, + &cpi->fn_ptr[BLOCK_32X32], + XMVCOST, &best_ref_mv); + + /* check to see if refining search is needed. */ + if (num00 > (further_steps - n)) + do_refine = 0; + + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int; + } else { + d->bmi.as_mv.first.as_int = mode_mv[NEWMV].as_int; + } + } + } + } + + /* final 1-away diamond refining search */ + if (do_refine == 1) { + int search_range; + + // It seems not a good way to set search_range. Need further investigation. + // search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); + search_range = 8; + + thissme = cpi->refining_search_sad(x, b, d, &d->bmi.as_mv.first, sadpb, + search_range, &cpi->fn_ptr[BLOCK_32X32], + XMVCOST, &best_ref_mv); + + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int; + } else { + d->bmi.as_mv.first.as_int = mode_mv[NEWMV].as_int; + } + } + + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; + cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first, &best_ref_mv, + x->errorperbit, + &cpi->fn_ptr[BLOCK_32X32], + XMVCOST, &dis, &sse); + } + mc_search_result[xd->mode_info_context->mbmi.ref_frame].as_int = + d->bmi.as_mv.first.as_int; + + mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int; + + // Add the new motion vector cost to our rolling cost variable + rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, + XMVCOST, 96, + xd->allow_high_precision_mv); + } + + case NEARESTMV: + case NEARMV: + // Clip "next_nearest" so that it does not extend to far out of image + vp8_clamp_mv2(&mode_mv[this_mode], xd); + + // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode. + if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0)) { + continue; + } + + case ZEROMV: + // Trap vectors that reach beyond the UMV borders + // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point + // because of the lack of break statements in the previous two cases. + if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { + continue; + } + + vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); + +#if CONFIG_PRED_FILTER + // Filtered prediction: + xd->mode_info_context->mbmi.pred_filter_enabled = + vp8_mode_order[mode_index].pred_filter_flag; + rate2 += vp8_cost_bit(cpi->common.prob_pred_filter_off, + xd->mode_info_context->mbmi.pred_filter_enabled); +#endif + + vp8_build_inter32x32_predictors_sb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + + compmode_cost = + vp8_cost_bit(get_pred_prob(cm, xd, PRED_COMP), 0); + + if (cpi->active_map_enabled && x->active_ptr[0] == 0) { + x->skip = 1; + } else if (x->encode_breakout) { + unsigned int sse; + unsigned int var; + int threshold = (xd->block[0].dequant[1] * + xd->block[0].dequant[1] >> 4); + + if (threshold < x->encode_breakout) + threshold = x->encode_breakout; + + var = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32)(*(b->base_src), + b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); + + if (sse < threshold) { + unsigned int q2dc = xd->block[24].dequant[0]; + /* If there is no codeable 2nd order dc + or a very small uniform pixel change change */ + if ((sse - var < q2dc *q2dc >> 4) || + (sse / 2 > var && sse - var < 64)) { + // Check u and v to make sure skip is ok + int sse2, sse3; + int var2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) + (x->src.u_buffer, x->src.uv_stride, + xd->dst.u_buffer, xd->dst.uv_stride, &sse2); + int var3 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) + (x->src.v_buffer, x->src.uv_stride, + xd->dst.v_buffer, xd->dst.uv_stride, &sse3); + sse2 += sse3; + if (sse2 * 2 < threshold) { + x->skip = 1; + distortion2 = sse + sse2; + rate2 = 500; + + /* for best_yrd calculation */ + rate_uv = 0; + distortion_uv = sse2; + + disable_skip = 1; + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + break; + } + } + } + } + + // Add in the Mv/mode cost + rate2 += vp8_cost_mv_ref(cpi, this_mode, mdcounts); + + // Y cost and distortion - FIXME support other transform sizes + super_block_yrd_8x8(x, &rate_y, &distortion, + IF_RTCD(&cpi->rtcd), &skippable_y); + rate2 += rate_y; + distortion2 += distortion; + + rd_inter32x32_uv_8x8(cpi, x, &rate_uv, &distortion_uv, + cpi->common.full_pixel, &skippable_uv); + + rate2 += rate_uv; + distortion2 += distortion_uv; + mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; + break; + + default: + break; + } + } else { /* xd->mode_info_context->mbmi.second_ref_frame != 0 */ + int ref1 = xd->mode_info_context->mbmi.ref_frame; + int ref2 = xd->mode_info_context->mbmi.second_ref_frame; + + mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; + switch (this_mode) { + case NEWMV: + if (mc_search_result[ref1].as_int == INVALID_MV || + mc_search_result[ref2].as_int == INVALID_MV) + continue; + xd->mode_info_context->mbmi.mv[0].as_int = mc_search_result[ref1].as_int; + xd->mode_info_context->mbmi.mv[1].as_int = mc_search_result[ref2].as_int; + rate2 += vp8_mv_bit_cost(&mc_search_result[ref1], + &frame_best_ref_mv[ref1], + XMVCOST, 96, + xd->allow_high_precision_mv); + rate2 += vp8_mv_bit_cost(&mc_search_result[ref2], + &frame_best_ref_mv[ref2], + XMVCOST, 96, + xd->allow_high_precision_mv); + break; + case ZEROMV: + xd->mode_info_context->mbmi.mv[0].as_int = 0; + xd->mode_info_context->mbmi.mv[1].as_int = 0; + break; + case NEARMV: + if (frame_near_mv[ref1].as_int == 0 || frame_near_mv[ref2].as_int == 0) { + continue; + } + xd->mode_info_context->mbmi.mv[0].as_int = frame_near_mv[ref1].as_int; + xd->mode_info_context->mbmi.mv[1].as_int = frame_near_mv[ref2].as_int; + break; + case NEARESTMV: + if (frame_nearest_mv[ref1].as_int == 0 || frame_nearest_mv[ref2].as_int == 0) { + continue; + } + xd->mode_info_context->mbmi.mv[0].as_int = frame_nearest_mv[ref1].as_int; + xd->mode_info_context->mbmi.mv[1].as_int = frame_nearest_mv[ref2].as_int; + break; + default: + break; + } + + /* Add in the Mv/mode cost */ + rate2 += vp8_cost_mv_ref(cpi, this_mode, mdcounts); + + vp8_clamp_mv2(&xd->mode_info_context->mbmi.mv[0], xd); + vp8_clamp_mv2(&xd->mode_info_context->mbmi.mv[1], xd); + if (((xd->mode_info_context->mbmi.mv[0].as_mv.row >> 3) < x->mv_row_min) || + ((xd->mode_info_context->mbmi.mv[0].as_mv.row >> 3) > x->mv_row_max) || + ((xd->mode_info_context->mbmi.mv[0].as_mv.col >> 3) < x->mv_col_min) || + ((xd->mode_info_context->mbmi.mv[0].as_mv.col >> 3) > x->mv_col_max) || + ((xd->mode_info_context->mbmi.mv[1].as_mv.row >> 3) < x->mv_row_min) || + ((xd->mode_info_context->mbmi.mv[1].as_mv.row >> 3) > x->mv_row_max) || + ((xd->mode_info_context->mbmi.mv[1].as_mv.col >> 3) < x->mv_col_min) || + ((xd->mode_info_context->mbmi.mv[1].as_mv.col >> 3) > x->mv_col_max)) { + continue; + } + + /* build first and second prediction */ + vp8_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + + /* Y cost and distortion - TODO(rbultje) support other transform sizes */ + super_block_yrd_8x8(x, &rate_y, &distortion, + IF_RTCD(&cpi->rtcd), &skippable_y); + + rate2 += rate_y; + distortion2 += distortion; + + /* UV cost and distortion */ + rd_inter32x32_uv_8x8(cpi, x, &rate_uv, &distortion_uv, + cpi->common.full_pixel, &skippable_uv); + + rate2 += rate_uv; + distortion2 += distortion_uv; + + /* don't bother w/ skip, we would never have come here if skip were + * enabled */ + xd->mode_info_context->mbmi.mode = this_mode; + + /* We don't include the cost of the second reference here, because there + * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in + * other words if you present them in that order, the second one is + * always known if the first is known */ + compmode_cost = vp8_cost_bit(get_pred_prob(cm, xd, PRED_COMP), 1); + } + + if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + rate2 += compmode_cost; + } + + + // Estimate the reference frame signaling cost and add it + // to the rolling cost variable. + rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame]; + + if (!disable_skip) { + // Test for the condition where skip block will be activated + // because there are no non zero coefficients and make any + // necessary adjustment for rate. Ignore if skip is coded at + // segment level as the cost wont have been added in. + if (cpi->common.mb_no_coeff_skip) { + int mb_skippable = skippable_y && skippable_uv; + int mb_skip_allowed; + + // Is Mb level skip allowed for this mb. + mb_skip_allowed = + !segfeature_active(xd, segment_id, SEG_LVL_EOB) || + get_segdata(xd, segment_id, SEG_LVL_EOB); + + if (mb_skippable) { + // Back out the coefficient coding costs + rate2 -= (rate_y + rate_uv); + // for best_yrd calculation + rate_uv = 0; + + if (mb_skip_allowed) { + int prob_skip_cost; + + // Cost the skip mb case + vp8_prob skip_prob = + get_pred_prob(cm, xd, PRED_MBSKIP); + + if (skip_prob) { + prob_skip_cost = vp8_cost_bit(skip_prob, 1); + rate2 += prob_skip_cost; + other_cost += prob_skip_cost; + } + } + } + // Add in the cost of the no skip flag. + else if (mb_skip_allowed) { + int prob_skip_cost = vp8_cost_bit(get_pred_prob(cm, xd, + PRED_MBSKIP), 0); + rate2 += prob_skip_cost; + other_cost += prob_skip_cost; + } + } + + // Calculate the final RD estimate for this mode. + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + } + +#if 0 + // Keep record of best intra distortion + if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && + (this_rd < best_intra_rd)) { + best_intra_rd = this_rd; + *returnintra = distortion2; + } +#endif + + if (!disable_skip && xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + if (this_rd < best_comp_rd) + best_comp_rd = this_rd; + if (this_rd < best_single_rd) + best_single_rd = this_rd; + if (this_rd < best_hybrid_rd) + best_hybrid_rd = this_rd; + } + + // Did this mode help.. i.e. is it the new best mode + if (this_rd < best_rd || x->skip) { + if (!mode_excluded) { +#if 0 + // Note index of best mode so far + best_mode_index = mode_index; + + if (this_mode <= B_PRED) { + xd->mode_info_context->mbmi.uv_mode = uv_intra_mode_8x8; + /* required for left and above block mv */ + xd->mode_info_context->mbmi.mv.as_int = 0; + } +#endif + + other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame]; + + /* Calculate the final y RD estimate for this mode */ + best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), + (distortion2 - distortion_uv)); + + *returnrate = rate2; + *returndistortion = distortion2; + best_rd = this_rd; + vpx_memcpy(&best_mbmode, &xd->mode_info_context->mbmi, sizeof(MB_MODE_INFO)); + } +#if 0 + // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time + cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; + cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; +#endif + } + // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around. + else { +#if 0 + cpi->rd_thresh_mult[mode_index] += 4; + + if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) + cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + + cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; +#endif + } + + /* keep record of best compound/single-only prediction */ + if (!disable_skip && xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { + int single_rd, hybrid_rd, single_rate, hybrid_rate; + + if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + single_rate = rate2 - compmode_cost; + hybrid_rate = rate2; + } else { + single_rate = rate2; + hybrid_rate = rate2 + compmode_cost; + } + + single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); + hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); + + if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME && + single_rd < best_single_rd) { + best_single_rd = single_rd; + } else if (xd->mode_info_context->mbmi.second_ref_frame != INTRA_FRAME && + single_rd < best_comp_rd) { + best_comp_rd = single_rd; + } + if (hybrid_rd < best_hybrid_rd) { + best_hybrid_rd = hybrid_rd; + } + } + + if (x->skip && !mode_excluded) + break; + } + + // TODO(rbultje) integrate with RD thresholding +#if 0 + // Reduce the activation RD thresholds for the best choice mode + if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && + (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { + int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); + + cpi->rd_thresh_mult[best_mode_index] = + (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? + cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; + cpi->rd_threshes[best_mode_index] = + (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; + } +#endif + + // This code forces Altref,0,0 and skip for the frame that overlays a + // an alrtef unless Altref is filtered. However, this is unsafe if + // segment level coding of ref frame or mode is enabled for this + // segment. + if (!segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && + !segfeature_active(xd, segment_id, SEG_LVL_MODE) && + cpi->is_src_frame_alt_ref && + (cpi->oxcf.arnr_max_frames == 0) && + (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { + xd->mode_info_context->mbmi.mode = ZEROMV; + xd->mode_info_context->mbmi.ref_frame = ALTREF_FRAME; + xd->mode_info_context->mbmi.mv[0].as_int = 0; + xd->mode_info_context->mbmi.uv_mode = DC_PRED; + xd->mode_info_context->mbmi.mb_skip_coeff = + (cpi->common.mb_no_coeff_skip) ? 1 : 0; + xd->mode_info_context->mbmi.partitioning = 0; + + xd->mode_info_context->mbmi.txfm_size = TX_8X8; + + if (best_rd != INT64_MAX) + store_coding_context(x, &x->sb_context[0], mode_index, NULL, + &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], + &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], + 0, 0, 0); + return best_rd; + } + + // macroblock modes + vpx_memcpy(&xd->mode_info_context->mbmi, &best_mbmode, + sizeof(MB_MODE_INFO)); + xd->mode_info_context->mbmi.txfm_size = TX_8X8; + + if (best_rd != INT64_MAX) + store_coding_context(x, &x->sb_context[0], mode_index, NULL, + &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], + &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], + (best_single_rd == INT64_MAX) ? INT_MIN : (best_rd - best_single_rd), + (best_comp_rd == INT64_MAX) ? INT_MIN : (best_rd - best_comp_rd), + (best_hybrid_rd == INT64_MAX) ? INT_MIN : (best_rd - best_hybrid_rd)); + + return best_rd; +} +#endif + +void vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, + int recon_uvoffset, + int *totalrate, int *totaldist) { VP8_COMMON *cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; @@ -3694,17 +4678,6 @@ int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error); - if (mbmi->ref_frame) { - unsigned char pred_context; - - pred_context = get_pred_context(cm, xd, PRED_COMP); - - if (mbmi->second_ref_frame == INTRA_FRAME) - cpi->single_pred_count[pred_context]++; - else - cpi->comp_pred_count[pred_context]++; - } - /* restore cpi->zbin_mode_boost_enabled */ cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; } @@ -3717,5 +4690,6 @@ int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, x->mb_context[xd->mb_index].distortion = distortion; x->mb_context[xd->mb_index].intra_error = intra_error; - return rate; + *totalrate = rate; + *totaldist = distortion; } diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index 2b5928de9593a01a54c3ba7610c496aff5795c37..0e36a519deaadf3fe1a6687b0be1b4c0c9f79ef4 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -18,7 +18,8 @@ extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int64_t *returnintra); -extern int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x); +extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *r, int *d); +extern void vp8_rd_pick_intra_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, int *r, int *d); extern void vp8_mv_pred ( diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c index 78a87f392c0798b3ff26678e41e5c37f0778c253..4fdfd11862040cbe3639ca28c52f10b0b1157dad 100644 --- a/vp8/encoder/sad_c.c +++ b/vp8/encoder/sad_c.c @@ -13,29 +13,6 @@ #include "vpx_ports/config.h" #include "vpx/vpx_integer.h" -unsigned int vp8_sad16x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - - int r, c; - unsigned int sad = 0; - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) { - sad += abs(src_ptr[c] - ref_ptr[c]); - } - - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - return sad; -} - - static __inline unsigned int sad_mx_n_c( const unsigned char *src_ptr, @@ -60,6 +37,21 @@ unsigned int sad_mx_n_c( return sad; } +unsigned int vp8_sad32x32_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32); +} + +unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16); +} unsigned int vp8_sad8x8_c( const unsigned char *src_ptr, @@ -104,6 +96,7 @@ unsigned int vp8_sad4x4_c( return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4); } + #if CONFIG_NEWBESTREFMV unsigned int vp8_sad2x16_c( const unsigned char *src_ptr, @@ -122,6 +115,34 @@ unsigned int vp8_sad16x2_c( return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 2); } #endif + +void vp8_sad32x32x3_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int *sad_array + ) { + sad_array[0] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr, ref_stride, 0x7fffffff); + sad_array[1] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); +} + +void vp8_sad32x32x8_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array + ) { + sad_array[0] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr, ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad16x16x3_c( const unsigned char *src_ptr, int src_stride, @@ -267,6 +288,18 @@ void vp8_sad4x4x8_c( sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); } +void vp8_sad32x32x4d_c(const unsigned char *src_ptr, + int src_stride, + unsigned char *ref_ptr[], + int ref_stride, + unsigned int *sad_array + ) { + sad_array[0] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); +} + void vp8_sad16x16x4d_c( const unsigned char *src_ptr, int src_stride, diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c index e9d02cdd41f32e0bf62bdfa802e9fdfdcd942a19..e88b80d3409395a906ed0bc067b18415efbe025e 100644 --- a/vp8/encoder/segmentation.c +++ b/vp8/encoder/segmentation.c @@ -200,42 +200,59 @@ void choose_segmap_coding_method(VP8_COMP *cpi) { // in the frame xd->mode_info_context = cm->mi; - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - segment_id = xd->mode_info_context->mbmi.segment_id; - - // Count the number of hits on each segment with no prediction - no_pred_segcounts[segment_id]++; - - // Temporal prediction not allowed on key frames - if (cm->frame_type != KEY_FRAME) { - // Test to see if the segment id matches the predicted value. - int seg_predicted = - (segment_id == get_pred_mb_segid(cm, segmap_index)); - - // Get the segment id prediction context - pred_context = - get_pred_context(cm, xd, PRED_SEG_ID); - - // Store the prediction status for this mb and update counts - // as appropriate - set_pred_flag(xd, PRED_SEG_ID, seg_predicted); - temporal_predictor_count[pred_context][seg_predicted]++; - - if (!seg_predicted) - // Update the "unpredicted" segment count - t_unpred_seg_counts[segment_id]++; - } + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { + for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 2) { + for (i = 0; i < 4; i++) { + static const int dx[4] = { +1, -1, +1, +1 }; + static const int dy[4] = { 0, +1, 0, -1 }; + int x_idx = i & 1, y_idx = i >> 1; + + if (mb_col + x_idx >= cm->mb_cols || + mb_row + y_idx >= cm->mb_rows) { + goto end; + } + + segmap_index = (mb_row + y_idx) * cm->mb_cols + mb_col + x_idx; + segment_id = xd->mode_info_context->mbmi.segment_id; + + // Count the number of hits on each segment with no prediction + no_pred_segcounts[segment_id]++; + + // Temporal prediction not allowed on key frames + if (cm->frame_type != KEY_FRAME) { + // Test to see if the segment id matches the predicted value. + int seg_predicted = + (segment_id == get_pred_mb_segid(cm, segmap_index)); - // Step on to the next mb - xd->mode_info_context++; + // Get the segment id prediction context + pred_context = + get_pred_context(cm, xd, PRED_SEG_ID); - // Step on to the next entry in the segment maps - segmap_index++; + // Store the prediction status for this mb and update counts + // as appropriate + set_pred_flag(xd, PRED_SEG_ID, seg_predicted); + temporal_predictor_count[pred_context][seg_predicted]++; + + if (!seg_predicted) + // Update the "unpredicted" segment count + t_unpred_seg_counts[segment_id]++; + } + +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + assert(!i); + xd->mode_info_context += 2; + break; + } +#endif + end: + xd->mode_info_context += dx[i] + dy[i] * cm->mode_info_stride; + } } // this is to account for the border in mode_info_context - xd->mode_info_context++; + xd->mode_info_context -= mb_col; + xd->mode_info_context += cm->mode_info_stride * 2; } // Work out probability tree for coding segments without prediction diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index 73a0a6b99fdcd3b26239fa3c331e892af45190e5..e17733c58f3eeb4736137a4249b19a5918d452bd 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -145,8 +145,18 @@ extern prototype_sad(vp8_variance_sad16x8); #endif extern prototype_sad(vp8_variance_sad16x16); +#ifndef vp8_variance_sad32x32 +#define vp8_variance_sad32x32 vp8_sad32x32_c +#endif +extern prototype_sad(vp8_variance_sad32x32); + // -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +#ifndef vp8_variance_sad32x32x3 +#define vp8_variance_sad32x32x3 vp8_sad32x32x3_c +#endif +extern prototype_sad_multi_same_address(vp8_variance_sad32x32x3); + #ifndef vp8_variance_sad16x16x3 #define vp8_variance_sad16x16x3 vp8_sad16x16x3_c #endif @@ -172,6 +182,11 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3); #endif extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3); +#ifndef vp8_variance_sad32x32x8 +#define vp8_variance_sad32x32x8 vp8_sad32x32x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad32x32x8); + #ifndef vp8_variance_sad16x16x8 #define vp8_variance_sad16x16x8 vp8_sad16x16x8_c #endif @@ -199,6 +214,11 @@ extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8); // -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +#ifndef vp8_variance_sad32x32x4d +#define vp8_variance_sad32x32x4d vp8_sad32x32x4d_c +#endif +extern prototype_sad_multi_dif_address(vp8_variance_sad32x32x4d); + #ifndef vp8_variance_sad16x16x4d #define vp8_variance_sad16x16x4d vp8_sad16x16x4d_c #endif @@ -258,6 +278,11 @@ extern prototype_variance(vp8_variance_var16x8); #endif extern prototype_variance(vp8_variance_var16x16); +#ifndef vp8_variance_var32x32 +#define vp8_variance_var32x32 vp8_variance32x32_c +#endif +extern prototype_variance(vp8_variance_var32x32); + // -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #ifndef vp8_variance_subpixvar4x4 @@ -285,26 +310,51 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8); #endif extern prototype_subpixvariance(vp8_variance_subpixvar16x16); +#ifndef vp8_variance_subpixvar32x32 +#define vp8_variance_subpixvar32x32 vp8_sub_pixel_variance32x32_c +#endif +extern prototype_subpixvariance(vp8_variance_subpixvar32x32); + #ifndef vp8_variance_halfpixvar16x16_h #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c #endif extern prototype_variance(vp8_variance_halfpixvar16x16_h); +#ifndef vp8_variance_halfpixvar32x32_h +#define vp8_variance_halfpixvar32x32_h vp8_variance_halfpixvar32x32_h_c +#endif +extern prototype_variance(vp8_variance_halfpixvar32x32_h); + #ifndef vp8_variance_halfpixvar16x16_v #define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c #endif extern prototype_variance(vp8_variance_halfpixvar16x16_v); +#ifndef vp8_variance_halfpixvar32x32_v +#define vp8_variance_halfpixvar32x32_v vp8_variance_halfpixvar32x32_v_c +#endif +extern prototype_variance(vp8_variance_halfpixvar32x32_v); + #ifndef vp8_variance_halfpixvar16x16_hv #define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c #endif extern prototype_variance(vp8_variance_halfpixvar16x16_hv); +#ifndef vp8_variance_halfpixvar32x32_hv +#define vp8_variance_halfpixvar32x32_hv vp8_variance_halfpixvar32x32_hv_c +#endif +extern prototype_variance(vp8_variance_halfpixvar32x32_hv); + #ifndef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c #endif extern prototype_subpixvariance(vp8_variance_subpixmse16x16); +#ifndef vp8_variance_subpixmse32x32 +#define vp8_variance_subpixmse32x32 vp8_sub_pixel_mse32x32_c +#endif +extern prototype_subpixvariance(vp8_variance_subpixmse32x32); + // -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #ifndef vp8_variance_getmbss @@ -349,38 +399,66 @@ typedef struct { vp8_sad_fn_t sad8x16; vp8_sad_fn_t sad16x8; vp8_sad_fn_t sad16x16; +#if CONFIG_SUPERBLOCKS + vp8_sad_fn_t sad32x32; +#endif vp8_variance_fn_t var4x4; vp8_variance_fn_t var8x8; vp8_variance_fn_t var8x16; vp8_variance_fn_t var16x8; vp8_variance_fn_t var16x16; +#if CONFIG_SUPERBLOCKS + vp8_variance_fn_t var32x32; +#endif vp8_subpixvariance_fn_t subpixvar4x4; vp8_subpixvariance_fn_t subpixvar8x8; vp8_subpixvariance_fn_t subpixvar8x16; vp8_subpixvariance_fn_t subpixvar16x8; vp8_subpixvariance_fn_t subpixvar16x16; +#if CONFIG_SUPERBLOCKS + vp8_subpixvariance_fn_t subpixvar32x32; +#endif vp8_variance_fn_t halfpixvar16x16_h; + vp8_variance_fn_t halfpixvar32x32_h; vp8_variance_fn_t halfpixvar16x16_v; +#if CONFIG_SUPERBLOCKS + vp8_variance_fn_t halfpixvar32x32_v; +#endif vp8_variance_fn_t halfpixvar16x16_hv; +#if CONFIG_SUPERBLOCKS + vp8_variance_fn_t halfpixvar32x32_hv; +#endif vp8_subpixvariance_fn_t subpixmse16x16; +#if CONFIG_SUPERBLOCKS + vp8_subpixvariance_fn_t subpixmse32x32; +#endif vp8_getmbss_fn_t getmbss; vp8_variance_fn_t mse16x16; +#if CONFIG_SUPERBLOCKS + vp8_sad_multi_fn_t sad32x32x3; +#endif vp8_sad_multi_fn_t sad16x16x3; vp8_sad_multi_fn_t sad16x8x3; vp8_sad_multi_fn_t sad8x16x3; vp8_sad_multi_fn_t sad8x8x3; vp8_sad_multi_fn_t sad4x4x3; +#if CONFIG_SUPERBLOCKS + vp8_sad_multi1_fn_t sad32x32x8; +#endif vp8_sad_multi1_fn_t sad16x16x8; vp8_sad_multi1_fn_t sad16x8x8; vp8_sad_multi1_fn_t sad8x16x8; vp8_sad_multi1_fn_t sad8x8x8; vp8_sad_multi1_fn_t sad4x4x8; +#if CONFIG_SUPERBLOCKS + vp8_sad_multi_d_fn_t sad32x32x4d; +#endif vp8_sad_multi_d_fn_t sad16x16x4d; vp8_sad_multi_d_fn_t sad16x8x4d; vp8_sad_multi_d_fn_t sad8x16x4d; diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c index 0b9d569b018e1312b7511ce16e9b6d302a2084c4..cbe2a51d6f046d78eb624ed36191810cfa2fd150 100644 --- a/vp8/encoder/variance_c.c +++ b/vp8/encoder/variance_c.c @@ -55,6 +55,20 @@ static void variance( } } +#if CONFIG_SUPERBLOCKS +unsigned int vp8_variance32x32_c(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); + *sse = var; + return (var - ((avg * avg) >> 10)); +} +#endif unsigned int vp8_variance16x16_c( const unsigned char *src_ptr, @@ -334,6 +348,27 @@ unsigned int vp8_sub_pixel_variance16x16_c return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } +#if CONFIG_SUPERBLOCKS +unsigned int vp8_sub_pixel_variance32x32_c(const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + unsigned short FData3[33 * 32]; // Temp data bufffer used in filtering + unsigned char temp2[36 * 32]; + const short *HFilter, *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter); + + return vp8_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); +} +#endif unsigned int vp8_variance_halfpixvar16x16_h_c( const unsigned char *src_ptr, @@ -345,17 +380,38 @@ unsigned int vp8_variance_halfpixvar16x16_h_c( ref_ptr, recon_stride, sse); } +#if CONFIG_SUPERBLOCKS +unsigned int vp8_variance_halfpixvar32x32_h_c(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp8_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, + ref_ptr, recon_stride, sse); +} +#endif + -unsigned int vp8_variance_halfpixvar16x16_v_c( +unsigned int vp8_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, + ref_ptr, recon_stride, sse); +} + +#if CONFIG_SUPERBLOCKS +unsigned int vp8_variance_halfpixvar32x32_v_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { - return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, + return vp8_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } - +#endif unsigned int vp8_variance_halfpixvar16x16_hv_c( const unsigned char *src_ptr, @@ -367,6 +423,16 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c( ref_ptr, recon_stride, sse); } +#if CONFIG_SUPERBLOCKS +unsigned int vp8_variance_halfpixvar32x32_hv_c(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp8_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, + ref_ptr, recon_stride, sse); +} +#endif unsigned int vp8_sub_pixel_mse16x16_c ( @@ -382,6 +448,19 @@ unsigned int vp8_sub_pixel_mse16x16_c return *sse; } +#if CONFIG_SUPERBLOCKS +unsigned int vp8_sub_pixel_mse32x32_c(const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + vp8_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); + return *sse; +} +#endif + unsigned int vp8_sub_pixel_variance16x8_c ( const unsigned char *src_ptr,